Spaces:
Sleeping
Sleeping
Commit ·
a8aea21
0
Parent(s):
Final Release: CampusGen AI Pipeline & Compositor
Browse files- .gitattributes +5 -0
- .gitignore +32 -0
- ai-toolkit +1 -0
- assets/fonts/Montserrat-Bold.ttf +3 -0
- assets/fonts/Montserrat-ExtraBold.ttf +3 -0
- assets/fonts/Montserrat-Medium.ttf +3 -0
- assets/fonts/Montserrat-Regular.ttf +3 -0
- assets/fonts/PlayfairDisplay-Bold.ttf +3 -0
- assets/fonts/PlayfairDisplay-Regular.ttf +3 -0
- configs/config.yaml +637 -0
- configs/train_sdxl_lora.yaml +84 -0
- configs/train_sdxl_lora_phase2.yaml +103 -0
- configs/train_sdxl_lora_phase3.yaml +86 -0
- deployment/README.md +84 -0
- deployment/app.py +663 -0
- deployment/pipelines.py +303 -0
- deployment/prompt_engine.py +267 -0
- deployment/requirements.txt +33 -0
- docs/CAMPUS-AI-PROJECT-BRIEF.md +316 -0
- docs/NOVELTY.md +225 -0
- docs/PIPELINE.md +179 -0
- docs/README.md +122 -0
- docs/SETUP.md +198 -0
- docs/architecture.html +1004 -0
- requirements.txt +57 -0
- scripts/augment_specific_v3.py +162 -0
- scripts/caption_generator.py +379 -0
- scripts/count_images.sh +60 -0
- scripts/count_splits.py +59 -0
- scripts/create_training_config.py +209 -0
- scripts/deploy_to_hf.py +56 -0
- scripts/image_deduplicator.py +152 -0
- scripts/monitor_downloads.py +63 -0
- scripts/pinterest_scraper.py +862 -0
- scripts/pinterest_tuning_scraper.py +368 -0
- scripts/poster_compositor.py +507 -0
- scripts/quality_filter.py +562 -0
- scripts/split_dataset.py +136 -0
- scripts/targeted_filter.py +167 -0
- scripts/targeted_scraper.py +72 -0
- scripts/test_checkpoint.py +371 -0
- scripts/tuning_dataset.py +518 -0
.gitattributes
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.ttf filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.otf filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python Cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Virtual Environments
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
| 9 |
+
.env
|
| 10 |
+
|
| 11 |
+
# Data and Models
|
| 12 |
+
data/
|
| 13 |
+
models/
|
| 14 |
+
output/
|
| 15 |
+
dataset/
|
| 16 |
+
|
| 17 |
+
# Deep Learning Frameworks
|
| 18 |
+
*.safetensors
|
| 19 |
+
*.pt
|
| 20 |
+
*.pth
|
| 21 |
+
*.ckpt
|
| 22 |
+
*.onnx
|
| 23 |
+
|
| 24 |
+
# Editor
|
| 25 |
+
.vscode/
|
| 26 |
+
.idea/
|
| 27 |
+
*.swp
|
| 28 |
+
|
| 29 |
+
# Logs and DBs
|
| 30 |
+
*.db
|
| 31 |
+
*.log
|
| 32 |
+
*.sqlite3
|
ai-toolkit
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit de7d22c9becf5f3385348d9d5ff901536c340d0c
|
assets/fonts/Montserrat-Bold.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc6e854971cea46b463be6f9eef4d9cd52f51cfc1fc0dd90c9d3e6483dc0ec61
|
| 3 |
+
size 454864
|
assets/fonts/Montserrat-ExtraBold.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3ac6a843d3ba6d5cafd44cf39e437055c8aed7e261010f595f57d3c7b3e2c1b
|
| 3 |
+
size 455468
|
assets/fonts/Montserrat-Medium.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dae47428bb041f9716604e0e07b5b0c8585b3bdd8183362f75c69fe7bb3cfaf4
|
| 3 |
+
size 447320
|
assets/fonts/Montserrat-Regular.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e8abe50c44c82e2242e97d1ec8c0d385c4890cdc50447bcdb8605c81a38cfb2
|
| 3 |
+
size 445928
|
assets/fonts/PlayfairDisplay-Bold.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c40f2293766a503bc70cce9e512ef844a4ccb7cbcde792fe2ea31d191917d8d6
|
| 3 |
+
size 300724
|
assets/fonts/PlayfairDisplay-Regular.ttf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5e26dc5e2e77fb2803a0bf02fd4f81ee136ec8dea863ccdb0c59a263b21378b
|
| 3 |
+
size 278688
|
configs/config.yaml
ADDED
|
@@ -0,0 +1,637 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# CampusGen AI - Master Configuration File
|
| 3 |
+
# All scripts reference this file - NO HARDCODING ALLOWED
|
| 4 |
+
# ============================================================================
|
| 5 |
+
|
| 6 |
+
project:
|
| 7 |
+
name: "CampusGen AI"
|
| 8 |
+
version: "1.0.0"
|
| 9 |
+
creator: "M Runeet Kumar"
|
| 10 |
+
location: "Ashta/Indore, MP, India"
|
| 11 |
+
start_date: "2026-02-13"
|
| 12 |
+
seed: 42 # Master random seed for reproducibility
|
| 13 |
+
|
| 14 |
+
# ============================================================================
|
| 15 |
+
# HARDWARE CONFIGURATION
|
| 16 |
+
# ============================================================================
|
| 17 |
+
hardware:
|
| 18 |
+
gpu:
|
| 19 |
+
name: "RTX 5070 Ti"
|
| 20 |
+
vram_gb: 12
|
| 21 |
+
cuda_version: "13.1"
|
| 22 |
+
compute_capability: "12.0" # SM120 (Blackwell)
|
| 23 |
+
system:
|
| 24 |
+
ram_gb: 32
|
| 25 |
+
cpu_cores: 24
|
| 26 |
+
storage_gb: 500
|
| 27 |
+
|
| 28 |
+
# ============================================================================
|
| 29 |
+
# DIRECTORY STRUCTURE
|
| 30 |
+
# ============================================================================
|
| 31 |
+
paths:
|
| 32 |
+
root: "."
|
| 33 |
+
data:
|
| 34 |
+
root: "data"
|
| 35 |
+
raw: "data/raw"
|
| 36 |
+
processed: "data/processed"
|
| 37 |
+
curated: "data/curated"
|
| 38 |
+
train: "data/train"
|
| 39 |
+
val: "data/val"
|
| 40 |
+
test: "data/test"
|
| 41 |
+
tuning: "data/tuning"
|
| 42 |
+
images: "data/images"
|
| 43 |
+
videos: "data/videos"
|
| 44 |
+
audio: "data/audio"
|
| 45 |
+
models:
|
| 46 |
+
root: "models"
|
| 47 |
+
llama:
|
| 48 |
+
base: "models/llama/base"
|
| 49 |
+
lora: "models/llama/lora"
|
| 50 |
+
merged: "models/llama/merged"
|
| 51 |
+
checkpoints: "models/llama/checkpoints"
|
| 52 |
+
sdxl:
|
| 53 |
+
base: "models/sdxl/base"
|
| 54 |
+
lora: "models/sdxl/lora"
|
| 55 |
+
checkpoints: "models/sdxl/checkpoints"
|
| 56 |
+
cogvideo:
|
| 57 |
+
base: "models/cogvideo/base"
|
| 58 |
+
lora: "models/cogvideo/lora"
|
| 59 |
+
voice:
|
| 60 |
+
base: "models/voice"
|
| 61 |
+
outputs:
|
| 62 |
+
root: "outputs"
|
| 63 |
+
images: "outputs/images"
|
| 64 |
+
videos: "outputs/videos"
|
| 65 |
+
combined: "outputs/combined"
|
| 66 |
+
gallery: "outputs/gallery"
|
| 67 |
+
webapp: "outputs/webapp"
|
| 68 |
+
test: "outputs/test"
|
| 69 |
+
logs:
|
| 70 |
+
root: "logs"
|
| 71 |
+
training: "logs/training"
|
| 72 |
+
inference: "logs/inference"
|
| 73 |
+
tensorboard: "logs/tensorboard"
|
| 74 |
+
configs:
|
| 75 |
+
root: "configs"
|
| 76 |
+
|
| 77 |
+
# ============================================================================
|
| 78 |
+
# MODEL CONFIGURATIONS
|
| 79 |
+
# ============================================================================
|
| 80 |
+
models:
|
| 81 |
+
llama:
|
| 82 |
+
# Model source
|
| 83 |
+
repo_id: "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
|
| 84 |
+
|
| 85 |
+
# Model parameters
|
| 86 |
+
max_seq_length: 2048
|
| 87 |
+
context_length: 8192
|
| 88 |
+
dtype: "bfloat16"
|
| 89 |
+
load_in_4bit: true
|
| 90 |
+
load_in_8bit: false
|
| 91 |
+
|
| 92 |
+
# Flash attention
|
| 93 |
+
use_flash_attention: true
|
| 94 |
+
attn_implementation: "flash_attention_2"
|
| 95 |
+
|
| 96 |
+
# Memory optimization
|
| 97 |
+
gradient_checkpointing: true
|
| 98 |
+
use_cache: false
|
| 99 |
+
|
| 100 |
+
# Inference parameters
|
| 101 |
+
temperature: 0.7
|
| 102 |
+
top_p: 0.9
|
| 103 |
+
top_k: 50
|
| 104 |
+
repetition_penalty: 1.1
|
| 105 |
+
max_new_tokens: 256
|
| 106 |
+
do_sample: true
|
| 107 |
+
|
| 108 |
+
sdxl:
|
| 109 |
+
# Model source
|
| 110 |
+
repo_id: "stabilityai/stable-diffusion-xl-base-1.0"
|
| 111 |
+
|
| 112 |
+
# Model parameters
|
| 113 |
+
dtype: "bfloat16"
|
| 114 |
+
variant: "fp16" # or "fp32"
|
| 115 |
+
|
| 116 |
+
# Generation parameters
|
| 117 |
+
height: 1024
|
| 118 |
+
width: 1024
|
| 119 |
+
num_inference_steps: 28
|
| 120 |
+
guidance_scale: 5.0
|
| 121 |
+
num_images_per_prompt: 1
|
| 122 |
+
|
| 123 |
+
# Memory optimization
|
| 124 |
+
enable_cpu_offload: false
|
| 125 |
+
enable_attention_slicing: false
|
| 126 |
+
enable_vae_slicing: false
|
| 127 |
+
enable_vae_tiling: false
|
| 128 |
+
|
| 129 |
+
cogvideo:
|
| 130 |
+
repo_id: "THUDM/CogVideoX-5b"
|
| 131 |
+
enabled: false
|
| 132 |
+
num_frames: 49
|
| 133 |
+
fps: 8
|
| 134 |
+
guidance_scale: 6.0
|
| 135 |
+
num_inference_steps: 50
|
| 136 |
+
|
| 137 |
+
voice:
|
| 138 |
+
repo_id: "fishaudio/fish-speech-1.5"
|
| 139 |
+
enabled: false
|
| 140 |
+
language: "hindi-english-mix"
|
| 141 |
+
speed: 1.0
|
| 142 |
+
pitch: 0
|
| 143 |
+
|
| 144 |
+
# ============================================================================
|
| 145 |
+
# DATASET CONFIGURATION
|
| 146 |
+
# ============================================================================
|
| 147 |
+
dataset:
|
| 148 |
+
# Data splits
|
| 149 |
+
splits:
|
| 150 |
+
train: 0.8
|
| 151 |
+
val: 0.1
|
| 152 |
+
test: 0.1
|
| 153 |
+
|
| 154 |
+
# Target samples
|
| 155 |
+
target_samples: 1000
|
| 156 |
+
min_samples: 100
|
| 157 |
+
max_samples: 10000
|
| 158 |
+
|
| 159 |
+
# Image specifications
|
| 160 |
+
image:
|
| 161 |
+
size: [1024, 1024]
|
| 162 |
+
format: "PNG"
|
| 163 |
+
quality: 95
|
| 164 |
+
channels: 3
|
| 165 |
+
|
| 166 |
+
# Quality thresholds
|
| 167 |
+
quality:
|
| 168 |
+
min_score: 0.7
|
| 169 |
+
min_clip_score: 0.25
|
| 170 |
+
max_toxicity: 0.3
|
| 171 |
+
|
| 172 |
+
# Stratification
|
| 173 |
+
stratify_by: "category"
|
| 174 |
+
balance_classes: true
|
| 175 |
+
|
| 176 |
+
# Categories
|
| 177 |
+
categories:
|
| 178 |
+
- "diwali"
|
| 179 |
+
- "holi"
|
| 180 |
+
- "navratri"
|
| 181 |
+
- "eid"
|
| 182 |
+
- "ganesh"
|
| 183 |
+
- "tech_fest"
|
| 184 |
+
- "cultural_fest"
|
| 185 |
+
- "sports"
|
| 186 |
+
- "workshop"
|
| 187 |
+
- "general"
|
| 188 |
+
|
| 189 |
+
# Prompt generation
|
| 190 |
+
prompts:
|
| 191 |
+
cities:
|
| 192 |
+
- "Indore"
|
| 193 |
+
- "Mumbai"
|
| 194 |
+
- "Delhi"
|
| 195 |
+
- "Bangalore"
|
| 196 |
+
- "Hyderabad"
|
| 197 |
+
- "Pune"
|
| 198 |
+
- "Chennai"
|
| 199 |
+
- "Kolkata"
|
| 200 |
+
- "Ahmedabad"
|
| 201 |
+
- "Jaipur"
|
| 202 |
+
colleges:
|
| 203 |
+
- "IIT"
|
| 204 |
+
- "NIT"
|
| 205 |
+
- "IIIT"
|
| 206 |
+
- "Engineering College"
|
| 207 |
+
- "University"
|
| 208 |
+
- "Technical Institute"
|
| 209 |
+
states:
|
| 210 |
+
- "Madhya Pradesh"
|
| 211 |
+
- "Maharashtra"
|
| 212 |
+
- "Karnataka"
|
| 213 |
+
- "Tamil Nadu"
|
| 214 |
+
- "Gujarat"
|
| 215 |
+
|
| 216 |
+
# ============================================================================
|
| 217 |
+
# API KEYS (used by download_dataset.py)
|
| 218 |
+
# ============================================================================
|
| 219 |
+
api_keys:
|
| 220 |
+
kaggle:
|
| 221 |
+
username: "runeetkumarmallarpu"
|
| 222 |
+
key: "KGAT_2cb1f3383897a1e34688a306c90cab32"
|
| 223 |
+
unsplash:
|
| 224 |
+
application_id: "873846"
|
| 225 |
+
access_key: "JkGKkdqmNPl9DVyTsESIq8FK3PvMP7VaVMelGYXqP5o"
|
| 226 |
+
secret_key: "-KeP6wUb8OHqT4Uq0uEAuvFEYc_6ol8-fR_E5rUEF6E"
|
| 227 |
+
pexels:
|
| 228 |
+
api_key: "WDWkMok7dehZeeigBo34hrpHybhzPuYHvZzbq3NTuPFwVUfc3MPTUz3q"
|
| 229 |
+
|
| 230 |
+
# ============================================================================
|
| 231 |
+
# SCRAPING CONFIGURATION
|
| 232 |
+
# ============================================================================
|
| 233 |
+
scraping:
|
| 234 |
+
pinterest:
|
| 235 |
+
max_images_per_query: 200
|
| 236 |
+
scroll_pause_seconds: 2.0
|
| 237 |
+
download_timeout: 15
|
| 238 |
+
min_resolution: 512
|
| 239 |
+
headless: true
|
| 240 |
+
|
| 241 |
+
# ============================================================================
|
| 242 |
+
# DEPLOYMENT CONFIGURATION - HF / GROQ
|
| 243 |
+
# ============================================================================
|
| 244 |
+
deployment_hf:
|
| 245 |
+
hf_username: "YOUR_HF_USERNAME" # ← CHANGE THIS
|
| 246 |
+
lora_repo_name: "campus-ai-poster-lora"
|
| 247 |
+
space_name: "campus-ai-poster-generator"
|
| 248 |
+
groq:
|
| 249 |
+
api_key_env: "GROQ_API_KEY" # reads from env variable
|
| 250 |
+
model: "llama-3.3-70b-versatile"
|
| 251 |
+
|
| 252 |
+
# ============================================================================
|
| 253 |
+
# TRAINING CONFIGURATION - WITH 10 ANTI-OVERFITTING TECHNIQUES
|
| 254 |
+
# ============================================================================
|
| 255 |
+
training:
|
| 256 |
+
# Global training settings
|
| 257 |
+
seed: 42
|
| 258 |
+
deterministic: true
|
| 259 |
+
benchmark: false
|
| 260 |
+
|
| 261 |
+
# Mixed precision training (Technique 1: Stability)
|
| 262 |
+
mixed_precision:
|
| 263 |
+
enabled: true
|
| 264 |
+
dtype: "bf16" # or "fp16"
|
| 265 |
+
opt_level: "O2"
|
| 266 |
+
|
| 267 |
+
# ==========================================================================
|
| 268 |
+
# SDXL LORA TRAINING
|
| 269 |
+
# ==========================================================================
|
| 270 |
+
sdxl_lora:
|
| 271 |
+
# LoRA configuration
|
| 272 |
+
lora:
|
| 273 |
+
rank: 32
|
| 274 |
+
alpha: 16
|
| 275 |
+
dropout: 0.05
|
| 276 |
+
bias: "none"
|
| 277 |
+
task_type: "CAUSAL_LM"
|
| 278 |
+
|
| 279 |
+
# Optimizer settings
|
| 280 |
+
optimizer:
|
| 281 |
+
type: "adamw8bit"
|
| 282 |
+
learning_rate: 1.0e-4
|
| 283 |
+
betas: [0.9, 0.999]
|
| 284 |
+
weight_decay: 0.01
|
| 285 |
+
|
| 286 |
+
# Learning rate scheduler (Technique 4: LR Scheduling)
|
| 287 |
+
scheduler:
|
| 288 |
+
type: "cosine_with_restarts" # Escapes local minima
|
| 289 |
+
warmup_steps: 100
|
| 290 |
+
num_cycles: 3 # 3 restarts across 4 epochs
|
| 291 |
+
min_lr: 1.0e-6
|
| 292 |
+
|
| 293 |
+
# Training hyperparameters
|
| 294 |
+
batch_size: 1
|
| 295 |
+
gradient_accumulation_steps: 4
|
| 296 |
+
effective_batch_size: 4 # batch_size * gradient_accumulation_steps
|
| 297 |
+
max_grad_norm: 1.0 # Technique 5: Gradient Clipping
|
| 298 |
+
epochs: 4
|
| 299 |
+
max_steps: 12800 # 20 epochs × 2560 chunks/epoch
|
| 300 |
+
|
| 301 |
+
# Min-SNR-γ Loss Weighting (Technique 6: Balanced Noise-Level Learning)
|
| 302 |
+
# Prevents model from memorizing easy noise levels and ignoring hard ones.
|
| 303 |
+
# Forces uniform learning across the entire denoising spectrum → generalization.
|
| 304 |
+
# Paper: "Efficient Diffusion Training via Min-SNR Weighting Strategy"
|
| 305 |
+
min_snr_gamma:
|
| 306 |
+
enabled: true
|
| 307 |
+
gamma: 5.0 # Clamps max loss weight; 5.0 is the paper-recommended default
|
| 308 |
+
|
| 309 |
+
# Model checkpointing (Technique 7: Best Model Selection)
|
| 310 |
+
checkpointing:
|
| 311 |
+
enabled: true
|
| 312 |
+
save_strategy: "epoch" # epoch, steps
|
| 313 |
+
save_steps: 100
|
| 314 |
+
save_total_limit: 3
|
| 315 |
+
save_best_only: true
|
| 316 |
+
monitor: "val_loss"
|
| 317 |
+
mode: "min"
|
| 318 |
+
|
| 319 |
+
# Validation (Technique 8: Cross-Validation Monitoring)
|
| 320 |
+
validation:
|
| 321 |
+
enabled: true
|
| 322 |
+
eval_strategy: "epoch" # epoch, steps
|
| 323 |
+
eval_steps: 50
|
| 324 |
+
eval_accumulation_steps: 1
|
| 325 |
+
per_device_eval_batch_size: 1
|
| 326 |
+
|
| 327 |
+
# Data augmentation (Technique 9: Regularization through augmentation)
|
| 328 |
+
augmentation:
|
| 329 |
+
enabled: false # For diffusion models, handled differently
|
| 330 |
+
techniques:
|
| 331 |
+
- "random_horizontal_flip"
|
| 332 |
+
- "color_jitter"
|
| 333 |
+
|
| 334 |
+
# Logging
|
| 335 |
+
logging:
|
| 336 |
+
steps: 10
|
| 337 |
+
report_to: "tensorboard"
|
| 338 |
+
log_level: "info"
|
| 339 |
+
|
| 340 |
+
# Noise scheduling (Technique 10: Progressive training)
|
| 341 |
+
noise_schedule:
|
| 342 |
+
type: "ddpm"
|
| 343 |
+
beta_start: 0.0001
|
| 344 |
+
beta_end: 0.02
|
| 345 |
+
num_train_timesteps: 1000
|
| 346 |
+
|
| 347 |
+
# ==========================================================================
|
| 348 |
+
# LLAMA LORA TRAINING
|
| 349 |
+
# ==========================================================================
|
| 350 |
+
llama_lora:
|
| 351 |
+
# LoRA configuration
|
| 352 |
+
lora:
|
| 353 |
+
rank: 32
|
| 354 |
+
alpha: 32
|
| 355 |
+
dropout: 0.05 # Technique 2: Dropout
|
| 356 |
+
target_modules:
|
| 357 |
+
- "q_proj"
|
| 358 |
+
- "k_proj"
|
| 359 |
+
- "v_proj"
|
| 360 |
+
- "o_proj"
|
| 361 |
+
- "gate_proj"
|
| 362 |
+
- "up_proj"
|
| 363 |
+
- "down_proj"
|
| 364 |
+
bias: "none"
|
| 365 |
+
task_type: "CAUSAL_LM"
|
| 366 |
+
modules_to_save: null
|
| 367 |
+
|
| 368 |
+
# Optimizer settings
|
| 369 |
+
optimizer:
|
| 370 |
+
type: "adamw8bit"
|
| 371 |
+
learning_rate: 2.0e-4
|
| 372 |
+
betas: [0.9, 0.999]
|
| 373 |
+
eps: 1.0e-8
|
| 374 |
+
weight_decay: 0.01 # Technique 3: L2 Regularization
|
| 375 |
+
|
| 376 |
+
# Learning rate scheduler (Technique 4: LR Scheduling)
|
| 377 |
+
scheduler:
|
| 378 |
+
type: "cosine_with_restarts"
|
| 379 |
+
warmup_steps: 50
|
| 380 |
+
warmup_ratio: 0.05
|
| 381 |
+
num_cycles: 2
|
| 382 |
+
min_lr: 1.0e-6
|
| 383 |
+
|
| 384 |
+
# Training hyperparameters
|
| 385 |
+
batch_size: 2
|
| 386 |
+
gradient_accumulation_steps: 4
|
| 387 |
+
effective_batch_size: 8
|
| 388 |
+
max_grad_norm: 1.0 # Technique 5: Gradient Clipping
|
| 389 |
+
epochs: 3
|
| 390 |
+
max_steps: -1
|
| 391 |
+
|
| 392 |
+
# Early stopping (Technique 6: Early Stopping)
|
| 393 |
+
early_stopping:
|
| 394 |
+
enabled: true
|
| 395 |
+
patience: 2
|
| 396 |
+
min_delta: 0.001
|
| 397 |
+
monitor: "eval_loss"
|
| 398 |
+
mode: "min"
|
| 399 |
+
|
| 400 |
+
# Model checkpointing (Technique 7: Best Model Selection)
|
| 401 |
+
checkpointing:
|
| 402 |
+
enabled: true
|
| 403 |
+
save_strategy: "epoch"
|
| 404 |
+
save_steps: 50
|
| 405 |
+
save_total_limit: 2
|
| 406 |
+
save_best_only: true
|
| 407 |
+
monitor: "eval_loss"
|
| 408 |
+
mode: "min"
|
| 409 |
+
|
| 410 |
+
# Validation (Technique 8: Cross-Validation Monitoring)
|
| 411 |
+
validation:
|
| 412 |
+
enabled: true
|
| 413 |
+
eval_strategy: "epoch"
|
| 414 |
+
eval_steps: 25
|
| 415 |
+
eval_accumulation_steps: 1
|
| 416 |
+
per_device_eval_batch_size: 2
|
| 417 |
+
|
| 418 |
+
# Layer-wise learning rate decay (Technique 9: Progressive unfreezing)
|
| 419 |
+
layer_wise_lr:
|
| 420 |
+
enabled: false
|
| 421 |
+
decay_rate: 0.9
|
| 422 |
+
|
| 423 |
+
# Packing and padding (Technique 10: Efficient batching)
|
| 424 |
+
data:
|
| 425 |
+
packing: false
|
| 426 |
+
max_seq_length: 2048
|
| 427 |
+
padding: "max_length"
|
| 428 |
+
truncation: true
|
| 429 |
+
|
| 430 |
+
# Logging
|
| 431 |
+
logging:
|
| 432 |
+
steps: 5
|
| 433 |
+
report_to: "tensorboard"
|
| 434 |
+
log_level: "info"
|
| 435 |
+
log_model: false
|
| 436 |
+
|
| 437 |
+
# ============================================================================
|
| 438 |
+
# INFERENCE CONFIGURATION
|
| 439 |
+
# ============================================================================
|
| 440 |
+
inference:
|
| 441 |
+
# Generation parameters
|
| 442 |
+
num_variants: 4
|
| 443 |
+
batch_size: 1
|
| 444 |
+
max_batch_size: 4
|
| 445 |
+
timeout_seconds: 60
|
| 446 |
+
|
| 447 |
+
# Quality control
|
| 448 |
+
quality:
|
| 449 |
+
enable_filters: true
|
| 450 |
+
min_quality_score: 0.6
|
| 451 |
+
max_toxicity: 0.7
|
| 452 |
+
|
| 453 |
+
# Output settings
|
| 454 |
+
output:
|
| 455 |
+
format: "PNG"
|
| 456 |
+
quality: 95
|
| 457 |
+
save_metadata: true
|
| 458 |
+
enable_watermark: true
|
| 459 |
+
watermark_text: "Generated by CampusGen AI"
|
| 460 |
+
|
| 461 |
+
# Templates
|
| 462 |
+
templates:
|
| 463 |
+
categories:
|
| 464 |
+
- "Technical Fest Poster"
|
| 465 |
+
- "Cultural Event Banner"
|
| 466 |
+
- "Sports Tournament Poster"
|
| 467 |
+
- "Club Recruitment Flyer"
|
| 468 |
+
- "Workshop Announcement"
|
| 469 |
+
- "Assignment Presentation"
|
| 470 |
+
- "Promotional Reel"
|
| 471 |
+
- "Academic Seminar"
|
| 472 |
+
styles:
|
| 473 |
+
- "Vibrant and Energetic"
|
| 474 |
+
- "Elegant and Professional"
|
| 475 |
+
- "Modern Minimalist"
|
| 476 |
+
- "Traditional Indian"
|
| 477 |
+
- "Tech-Futuristic"
|
| 478 |
+
- "Artistic and Creative"
|
| 479 |
+
|
| 480 |
+
# ============================================================================
|
| 481 |
+
# RESPONSIBLE AI CONFIGURATION
|
| 482 |
+
# ============================================================================
|
| 483 |
+
responsibility:
|
| 484 |
+
# Content safety
|
| 485 |
+
toxicity_filter:
|
| 486 |
+
enabled: true
|
| 487 |
+
model: "multilingual"
|
| 488 |
+
threshold: 0.7
|
| 489 |
+
block_threshold: 0.85
|
| 490 |
+
|
| 491 |
+
# Content watermarking
|
| 492 |
+
watermark:
|
| 493 |
+
enabled: true
|
| 494 |
+
type: "C2PA"
|
| 495 |
+
text: "AI Generated - CampusGen AI"
|
| 496 |
+
visible: false
|
| 497 |
+
|
| 498 |
+
# Logging and monitoring
|
| 499 |
+
logging:
|
| 500 |
+
log_all_generations: true
|
| 501 |
+
log_blocked_content: true
|
| 502 |
+
log_path: "logs/inference/safety.log"
|
| 503 |
+
|
| 504 |
+
# Rate limiting
|
| 505 |
+
rate_limit:
|
| 506 |
+
enabled: false
|
| 507 |
+
max_requests_per_hour: 100
|
| 508 |
+
max_requests_per_day: 1000
|
| 509 |
+
|
| 510 |
+
# ============================================================================
|
| 511 |
+
# WEB APPLICATION CONFIGURATION
|
| 512 |
+
# ============================================================================
|
| 513 |
+
webapp:
|
| 514 |
+
# Server settings
|
| 515 |
+
server:
|
| 516 |
+
host: "0.0.0.0"
|
| 517 |
+
port: 7860
|
| 518 |
+
ssl: false
|
| 519 |
+
share: false
|
| 520 |
+
debug: false
|
| 521 |
+
|
| 522 |
+
# UI settings
|
| 523 |
+
ui:
|
| 524 |
+
title: "CampusGen AI - Indian College Poster Generator"
|
| 525 |
+
description: "Generate stunning posters for college events"
|
| 526 |
+
theme: "soft"
|
| 527 |
+
max_file_size_mb: 10
|
| 528 |
+
allow_flagging: false
|
| 529 |
+
|
| 530 |
+
# Session settings
|
| 531 |
+
session:
|
| 532 |
+
max_concurrent_users: 10
|
| 533 |
+
timeout_minutes: 30
|
| 534 |
+
|
| 535 |
+
# Example prompts
|
| 536 |
+
examples:
|
| 537 |
+
- "IIT Indore Techfest 2026 - Innovation and Robotics"
|
| 538 |
+
- "Cultural Night - Traditional Dance Celebration"
|
| 539 |
+
- "Inter-College Basketball Championship"
|
| 540 |
+
- "Photography Club Recruitment Drive"
|
| 541 |
+
- "Machine Learning Workshop Series"
|
| 542 |
+
|
| 543 |
+
# ============================================================================
|
| 544 |
+
# EVALUATION CONFIGURATION
|
| 545 |
+
# ============================================================================
|
| 546 |
+
evaluation:
|
| 547 |
+
# Metrics to compute
|
| 548 |
+
metrics:
|
| 549 |
+
- "fid" # Frechet Inception Distance
|
| 550 |
+
- "clip_score" # Text-image alignment
|
| 551 |
+
- "inception_score" # Image quality
|
| 552 |
+
- "lpips" # Perceptual similarity
|
| 553 |
+
|
| 554 |
+
# Thresholds
|
| 555 |
+
thresholds:
|
| 556 |
+
fid_max: 50.0 # Lower is better
|
| 557 |
+
clip_score_min: 0.25 # Higher is better
|
| 558 |
+
inception_score_min: 3.0 # Higher is better
|
| 559 |
+
|
| 560 |
+
# Evaluation settings
|
| 561 |
+
num_samples: 100
|
| 562 |
+
batch_size: 10
|
| 563 |
+
save_comparisons: true
|
| 564 |
+
|
| 565 |
+
# ============================================================================
|
| 566 |
+
# OPTIMIZATION FLAGS
|
| 567 |
+
# ============================================================================
|
| 568 |
+
optimization:
|
| 569 |
+
# PyTorch optimizations (tuned for SM120 Blackwell / CUDA 13.x)
|
| 570 |
+
torch:
|
| 571 |
+
backends:
|
| 572 |
+
cudnn:
|
| 573 |
+
enabled: true
|
| 574 |
+
benchmark: true
|
| 575 |
+
deterministic: false
|
| 576 |
+
cuda:
|
| 577 |
+
matmul:
|
| 578 |
+
allow_tf32: true # SM120 TF32 tensor cores
|
| 579 |
+
allow_bf16_reduced_precision_reduction: true # Native bf16 on Blackwell
|
| 580 |
+
allow_fp16_reduced_precision_reduction: true
|
| 581 |
+
|
| 582 |
+
# Memory optimizations
|
| 583 |
+
memory:
|
| 584 |
+
empty_cache_steps: 10
|
| 585 |
+
max_split_size_mb: 1024 # SM120 memory controller handles larger splits
|
| 586 |
+
|
| 587 |
+
# Compile settings — SM120 benefits massively from torch.compile
|
| 588 |
+
compile:
|
| 589 |
+
enabled: true
|
| 590 |
+
mode: "max-autotune" # Best for Blackwell's large SM count
|
| 591 |
+
fullgraph: false
|
| 592 |
+
|
| 593 |
+
# ============================================================================
|
| 594 |
+
# MONITORING AND DEBUGGING
|
| 595 |
+
# ============================================================================
|
| 596 |
+
monitoring:
|
| 597 |
+
# TensorBoard
|
| 598 |
+
tensorboard:
|
| 599 |
+
enabled: true
|
| 600 |
+
log_dir: "logs/tensorboard"
|
| 601 |
+
update_freq: "batch"
|
| 602 |
+
|
| 603 |
+
# Weights & Biases
|
| 604 |
+
wandb:
|
| 605 |
+
enabled: false
|
| 606 |
+
project: "campus-ai"
|
| 607 |
+
entity: null
|
| 608 |
+
tags: ["indian-college", "poster-generation"]
|
| 609 |
+
|
| 610 |
+
# System monitoring
|
| 611 |
+
system:
|
| 612 |
+
log_gpu_stats: true
|
| 613 |
+
log_memory_stats: true
|
| 614 |
+
log_interval_steps: 10
|
| 615 |
+
|
| 616 |
+
# ============================================================================
|
| 617 |
+
# DEPLOYMENT CONFIGURATION
|
| 618 |
+
# ============================================================================
|
| 619 |
+
deployment:
|
| 620 |
+
# Docker settings
|
| 621 |
+
docker:
|
| 622 |
+
base_image: "nvidia/cuda:12.1.0-runtime-ubuntu22.04"
|
| 623 |
+
expose_port: 7860
|
| 624 |
+
|
| 625 |
+
# API settings
|
| 626 |
+
api:
|
| 627 |
+
enabled: false
|
| 628 |
+
host: "0.0.0.0"
|
| 629 |
+
port: 8000
|
| 630 |
+
workers: 4
|
| 631 |
+
timeout: 300
|
| 632 |
+
|
| 633 |
+
# Resource limits
|
| 634 |
+
resources:
|
| 635 |
+
max_memory_gb: 16
|
| 636 |
+
max_gpu_memory_gb: 16
|
| 637 |
+
max_cpu_percent: 80
|
configs/train_sdxl_lora.yaml
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
job: extension
|
| 2 |
+
config:
|
| 3 |
+
name: campus_ai_poster_sdxl
|
| 4 |
+
process:
|
| 5 |
+
- type: sd_trainer
|
| 6 |
+
training_folder: /mnt/e/campus-ai/models/sdxl/checkpoints
|
| 7 |
+
device: cuda:0
|
| 8 |
+
trigger_word: campus_ai_poster
|
| 9 |
+
network:
|
| 10 |
+
type: lora
|
| 11 |
+
linear: 32
|
| 12 |
+
linear_alpha: 16
|
| 13 |
+
dropout: 0.05
|
| 14 |
+
network_kwargs:
|
| 15 |
+
lora_plus_lr_ratio: 1.0
|
| 16 |
+
save:
|
| 17 |
+
dtype: bf16
|
| 18 |
+
save_every: 500
|
| 19 |
+
max_step_saves_to_keep: 5
|
| 20 |
+
datasets:
|
| 21 |
+
- folder_path: /mnt/e/campus-ai/data/train
|
| 22 |
+
caption_ext: txt
|
| 23 |
+
caption_dropout_rate: 0.1
|
| 24 |
+
shuffle_tokens: true
|
| 25 |
+
cache_latents_to_disk: true
|
| 26 |
+
num_workers: 8
|
| 27 |
+
resolution:
|
| 28 |
+
- 1024
|
| 29 |
+
- 1024
|
| 30 |
+
train:
|
| 31 |
+
batch_size: 1
|
| 32 |
+
steps: 12800
|
| 33 |
+
gradient_accumulation_steps: 4
|
| 34 |
+
train_unet: true
|
| 35 |
+
train_text_encoder: false
|
| 36 |
+
disable_sampling: true
|
| 37 |
+
gradient_checkpointing: true
|
| 38 |
+
noise_scheduler: ddpm
|
| 39 |
+
optimizer: adamw8bit
|
| 40 |
+
lr: 0.0001
|
| 41 |
+
lr_warmup_steps: 100
|
| 42 |
+
min_snr_gamma: 5.0
|
| 43 |
+
optimizer_params:
|
| 44 |
+
weight_decay: 0.01
|
| 45 |
+
betas:
|
| 46 |
+
- 0.9
|
| 47 |
+
- 0.999
|
| 48 |
+
ema_config:
|
| 49 |
+
use_ema: true
|
| 50 |
+
ema_decay: 0.999
|
| 51 |
+
dtype: bf16
|
| 52 |
+
lr_scheduler: cosine_with_restarts
|
| 53 |
+
lr_scheduler_params:
|
| 54 |
+
T_0: 4267
|
| 55 |
+
T_mult: 1
|
| 56 |
+
eta_min: 0.00001
|
| 57 |
+
model:
|
| 58 |
+
name_or_path: stabilityai/stable-diffusion-xl-base-1.0
|
| 59 |
+
is_xl: true
|
| 60 |
+
sample:
|
| 61 |
+
sampler: euler_a
|
| 62 |
+
sample_every: 999999
|
| 63 |
+
width: 512
|
| 64 |
+
height: 512
|
| 65 |
+
prompts:
|
| 66 |
+
- campus_ai_poster a vibrant technology fest poster with neon colors and bold
|
| 67 |
+
typography
|
| 68 |
+
- campus_ai_poster a colorful Diwali celebration poster with golden diyas and
|
| 69 |
+
rangoli
|
| 70 |
+
- campus_ai_poster a professional workshop seminar poster with modern minimalist
|
| 71 |
+
design
|
| 72 |
+
- campus_ai_poster a dynamic sports tournament poster with action silhouettes
|
| 73 |
+
neg: ""
|
| 74 |
+
seed: 42
|
| 75 |
+
walk_seed: true
|
| 76 |
+
guidance_scale: 4
|
| 77 |
+
sample_steps: 28
|
| 78 |
+
logging:
|
| 79 |
+
log_every: 10
|
| 80 |
+
use_wandb: false
|
| 81 |
+
verbose: true
|
| 82 |
+
meta:
|
| 83 |
+
name: campus_ai_v1
|
| 84 |
+
version: "1.0"
|
configs/train_sdxl_lora_phase2.yaml
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
job: extension
|
| 3 |
+
config:
|
| 4 |
+
name: campus_ai_poster_sdxl_phase2
|
| 5 |
+
process:
|
| 6 |
+
- type: sd_trainer
|
| 7 |
+
training_folder: /mnt/e/campus-ai/models/sdxl/checkpoints
|
| 8 |
+
device: cuda:0
|
| 9 |
+
trigger_word: campus_ai_poster
|
| 10 |
+
network:
|
| 11 |
+
type: lora
|
| 12 |
+
linear: 32
|
| 13 |
+
linear_alpha: 16
|
| 14 |
+
dropout: 0.1
|
| 15 |
+
network_kwargs:
|
| 16 |
+
lora_plus_lr_ratio: 1.0
|
| 17 |
+
# ==========================================================
|
| 18 |
+
# PHASE 2: RESUME FROM PHASE 1 WEIGHTS
|
| 19 |
+
# Load the layout knowledge so we only refine details
|
| 20 |
+
# ==========================================================
|
| 21 |
+
pretrained_lora_path: /mnt/e/campus-ai/models/sdxl/checkpoints/campus_ai_poster_sdxl_phase1/campus_ai_poster_sdxl.safetensors
|
| 22 |
+
save:
|
| 23 |
+
dtype: bf16
|
| 24 |
+
save_every: 500
|
| 25 |
+
max_step_saves_to_keep: 5
|
| 26 |
+
datasets:
|
| 27 |
+
- folder_path: /mnt/e/campus-ai/data/train
|
| 28 |
+
caption_ext: txt
|
| 29 |
+
caption_dropout_rate: 0.1
|
| 30 |
+
shuffle_tokens: true
|
| 31 |
+
cache_latents_to_disk: true
|
| 32 |
+
num_workers: 8
|
| 33 |
+
resolution: [1024, 1024]
|
| 34 |
+
- folder_path: /mnt/e/campus-ai/data/val
|
| 35 |
+
caption_ext: txt
|
| 36 |
+
caption_dropout_rate: 0.1
|
| 37 |
+
shuffle_tokens: true
|
| 38 |
+
cache_latents_to_disk: true
|
| 39 |
+
num_workers: 8
|
| 40 |
+
resolution: [1024, 1024]
|
| 41 |
+
- folder_path: /mnt/e/campus-ai/data/test
|
| 42 |
+
caption_ext: txt
|
| 43 |
+
caption_dropout_rate: 0.1
|
| 44 |
+
shuffle_tokens: true
|
| 45 |
+
cache_latents_to_disk: true
|
| 46 |
+
num_workers: 8
|
| 47 |
+
resolution: [1024, 1024]
|
| 48 |
+
train:
|
| 49 |
+
batch_size: 1
|
| 50 |
+
# Increased to 20,000 steps since we are now feeding 100% of the dataset
|
| 51 |
+
steps: 20000
|
| 52 |
+
gradient_accumulation_steps: 4
|
| 53 |
+
train_unet: true
|
| 54 |
+
train_text_encoder: false
|
| 55 |
+
disable_sampling: true
|
| 56 |
+
gradient_checkpointing: true
|
| 57 |
+
noise_scheduler: ddpm
|
| 58 |
+
optimizer: adamw8bit
|
| 59 |
+
# ==========================================================
|
| 60 |
+
# PHASE 2: LOWER LEARNING RATE
|
| 61 |
+
# Bake in high-frequency detail without blowing out the layout
|
| 62 |
+
# ==========================================================
|
| 63 |
+
lr: 2.0e-5
|
| 64 |
+
lr_warmup_steps: 100
|
| 65 |
+
min_snr_gamma: 5.0
|
| 66 |
+
optimizer_params:
|
| 67 |
+
weight_decay: 0.01
|
| 68 |
+
betas:
|
| 69 |
+
- 0.9
|
| 70 |
+
- 0.999
|
| 71 |
+
ema_config:
|
| 72 |
+
use_ema: true
|
| 73 |
+
ema_decay: 0.999
|
| 74 |
+
dtype: bf16
|
| 75 |
+
lr_scheduler: cosine
|
| 76 |
+
model:
|
| 77 |
+
name_or_path: stabilityai/stable-diffusion-xl-base-1.0
|
| 78 |
+
is_xl: true
|
| 79 |
+
sample:
|
| 80 |
+
sampler: euler_a
|
| 81 |
+
sample_every: 999999
|
| 82 |
+
width: 512
|
| 83 |
+
height: 512
|
| 84 |
+
prompts:
|
| 85 |
+
- campus_ai_poster a vibrant technology fest poster with neon colors and bold
|
| 86 |
+
typography
|
| 87 |
+
- campus_ai_poster a colorful Diwali celebration poster with golden diyas and
|
| 88 |
+
rangoli
|
| 89 |
+
- campus_ai_poster a professional workshop seminar poster with modern minimalist
|
| 90 |
+
design
|
| 91 |
+
- campus_ai_poster a dynamic sports tournament poster with action silhouettes
|
| 92 |
+
neg: ""
|
| 93 |
+
seed: 42
|
| 94 |
+
walk_seed: true
|
| 95 |
+
guidance_scale: 4
|
| 96 |
+
sample_steps: 28
|
| 97 |
+
logging:
|
| 98 |
+
log_every: 10
|
| 99 |
+
use_wandb: false
|
| 100 |
+
verbose: true
|
| 101 |
+
meta:
|
| 102 |
+
name: campus_ai_v2_perfection
|
| 103 |
+
version: "2.0"
|
configs/train_sdxl_lora_phase3.yaml
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
job: extension
|
| 2 |
+
config:
|
| 3 |
+
name: campus_ai_poster_sdxl_phase3
|
| 4 |
+
process:
|
| 5 |
+
- type: sd_trainer
|
| 6 |
+
training_folder: /mnt/e/campus-ai/models/sdxl/checkpoints
|
| 7 |
+
device: cuda:0
|
| 8 |
+
trigger_word: campus_ai_poster
|
| 9 |
+
network:
|
| 10 |
+
type: lora
|
| 11 |
+
linear: 32
|
| 12 |
+
linear_alpha: 16
|
| 13 |
+
dropout: 0.1
|
| 14 |
+
network_kwargs:
|
| 15 |
+
lora_plus_lr_ratio: 1.0
|
| 16 |
+
# ==========================================================
|
| 17 |
+
# PHASE 3: RESUME FROM PHASE 2 WEIGHTS
|
| 18 |
+
# Laser-focused tuning on the 5,500 highly curated strict dataset
|
| 19 |
+
# ==========================================================
|
| 20 |
+
pretrained_lora_path: /mnt/e/campus-ai/models/sdxl/checkpoints/campus_ai_poster_sdxl_phase2/campus_ai_poster_sdxl_phase2.safetensors
|
| 21 |
+
save:
|
| 22 |
+
dtype: bf16
|
| 23 |
+
save_every: 250
|
| 24 |
+
max_step_saves_to_keep: 5
|
| 25 |
+
datasets:
|
| 26 |
+
- folder_path: /mnt/e/campus-ai/data/tuning
|
| 27 |
+
shuffle_tokens: true
|
| 28 |
+
cache_latents_to_disk: true
|
| 29 |
+
num_workers: 8
|
| 30 |
+
resolution: [1024, 1024]
|
| 31 |
+
train:
|
| 32 |
+
batch_size: 1
|
| 33 |
+
# Smaller steps since tuning dataset is highly concentrated (6,448 images)
|
| 34 |
+
steps: 6448
|
| 35 |
+
gradient_accumulation_steps: 4
|
| 36 |
+
train_unet: true
|
| 37 |
+
train_text_encoder: false
|
| 38 |
+
disable_sampling: true
|
| 39 |
+
gradient_checkpointing: true
|
| 40 |
+
noise_scheduler: ddpm
|
| 41 |
+
optimizer: adamw8bit
|
| 42 |
+
# ==========================================================
|
| 43 |
+
# PHASE 3: EXTREMELY LOW LEARNING RATE
|
| 44 |
+
# Lock in final aesthetic consistency from strictly valid posters
|
| 45 |
+
# ==========================================================
|
| 46 |
+
lr: 1.0e-5
|
| 47 |
+
lr_warmup_steps: 50
|
| 48 |
+
min_snr_gamma: 5.0
|
| 49 |
+
optimizer_params:
|
| 50 |
+
weight_decay: 0.01
|
| 51 |
+
betas:
|
| 52 |
+
- 0.9
|
| 53 |
+
- 0.999
|
| 54 |
+
ema_config:
|
| 55 |
+
use_ema: true
|
| 56 |
+
ema_decay: 0.999
|
| 57 |
+
dtype: bf16
|
| 58 |
+
lr_scheduler: cosine
|
| 59 |
+
model:
|
| 60 |
+
name_or_path: stabilityai/stable-diffusion-xl-base-1.0
|
| 61 |
+
is_xl: true
|
| 62 |
+
sample:
|
| 63 |
+
sampler: euler_a
|
| 64 |
+
sample_every: 999999
|
| 65 |
+
width: 512
|
| 66 |
+
height: 512
|
| 67 |
+
prompts:
|
| 68 |
+
- campus_ai_poster a vibrant technology fest poster with neon colors and bold
|
| 69 |
+
typography
|
| 70 |
+
- campus_ai_poster a colorful Diwali celebration poster with golden diyas and
|
| 71 |
+
rangoli
|
| 72 |
+
- campus_ai_poster a professional workshop seminar poster with modern minimalist
|
| 73 |
+
design
|
| 74 |
+
- campus_ai_poster a dynamic sports tournament poster with action silhouettes
|
| 75 |
+
neg: ""
|
| 76 |
+
seed: 42
|
| 77 |
+
walk_seed: true
|
| 78 |
+
guidance_scale: 4
|
| 79 |
+
sample_steps: 28
|
| 80 |
+
logging:
|
| 81 |
+
log_every: 10
|
| 82 |
+
use_wandb: false
|
| 83 |
+
verbose: true
|
| 84 |
+
meta:
|
| 85 |
+
name: campus_ai_v3_tuning
|
| 86 |
+
version: "3.0"
|
deployment/README.md
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: CampusGen AI - Event Poster Generator
|
| 3 |
+
emoji: 🎨
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.12.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: mit
|
| 11 |
+
hardware: zero-a10g
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# 🎨 CampusGen AI – Universal Event Poster Generator
|
| 15 |
+
|
| 16 |
+
Generate professional event posters for **any occasion** in 10–15 seconds.
|
| 17 |
+
|
| 18 |
+
## Features
|
| 19 |
+
|
| 20 |
+
- **5 Generation Modes**: Text→Poster, Reference Image (IP-Adapter), Image Transform, Inpainting, HD Upscale
|
| 21 |
+
- **AI-Powered**: Flux.1-dev fine-tuned on 55,000+ diverse poster images via LoRA
|
| 22 |
+
- **55 Categories**: Tech fests, cultural events, festivals (Diwali, Holi, Navratri), sports, workshops, and more
|
| 23 |
+
- **Smart Prompts**: Groq Llama 3.3 70B understands your event semantics and generates optimal prompts
|
| 24 |
+
- **10 Visual Styles**: Vibrant, Elegant, Minimalist, Traditional Indian, Tech-Futuristic, Neon Glow, and more
|
| 25 |
+
- **HD Upscaling**: Real-ESRGAN 4x for print-ready posters
|
| 26 |
+
- **Batch Generation**: Generate up to 4 variants at once
|
| 27 |
+
- **Zero Cost**: Free deployment via ZeroGPU
|
| 28 |
+
|
| 29 |
+
## How to Use
|
| 30 |
+
|
| 31 |
+
### Tab 1: Text → Poster
|
| 32 |
+
|
| 33 |
+
1. Describe your event (e.g., "IIT Indore Techfest 2026 — Robotics & AI Championships")
|
| 34 |
+
2. Select event type and visual style
|
| 35 |
+
3. Click **Generate Poster**
|
| 36 |
+
|
| 37 |
+
### Tab 2: Reference Image
|
| 38 |
+
|
| 39 |
+
1. Upload a poster you like as a reference
|
| 40 |
+
2. Describe your event
|
| 41 |
+
3. Adjust style influence slider
|
| 42 |
+
4. Click **Generate with Reference**
|
| 43 |
+
|
| 44 |
+
### Tab 3: Image Transform
|
| 45 |
+
|
| 46 |
+
1. Upload an existing poster
|
| 47 |
+
2. Describe the transformation (e.g., "Make it neon-themed")
|
| 48 |
+
3. Adjust transformation strength
|
| 49 |
+
4. Click **Transform Poster**
|
| 50 |
+
|
| 51 |
+
### Tab 4: Inpaint / Edit
|
| 52 |
+
|
| 53 |
+
1. Upload a poster
|
| 54 |
+
2. Draw over the area you want to change
|
| 55 |
+
3. Describe what should fill it
|
| 56 |
+
4. Click **Inpaint Region**
|
| 57 |
+
|
| 58 |
+
### Tab 5: HD Upscale
|
| 59 |
+
|
| 60 |
+
1. Upload any image
|
| 61 |
+
2. Select 2x or 4x scale
|
| 62 |
+
3. Click **Upscale**
|
| 63 |
+
|
| 64 |
+
## Technical Details
|
| 65 |
+
|
| 66 |
+
| Component | Details |
|
| 67 |
+
|-----------|---------|
|
| 68 |
+
| Base Model | Flux.1-dev (12B params) |
|
| 69 |
+
| Fine-tuning | LoRA (rank 32, bf16) |
|
| 70 |
+
| Dataset | 55,000+ curated event posters, 55 categories |
|
| 71 |
+
| LLM | Llama 3.3 70B via Groq |
|
| 72 |
+
| IP-Adapter | Reference image style extraction |
|
| 73 |
+
| Upscaler | Real-ESRGAN 4x |
|
| 74 |
+
| Hardware | ZeroGPU (shared A100) |
|
| 75 |
+
|
| 76 |
+
## Pipeline (GPU-Accelerated)
|
| 77 |
+
|
| 78 |
+
```text
|
| 79 |
+
Scraping (CPU) → Quality Filter (GPU) → Captioning (GPU) → Split → Train LoRA (GPU) → Deploy
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
## Author
|
| 83 |
+
|
| 84 |
+
Built with ❤️ by M Runeet Kumar
|
deployment/app.py
ADDED
|
@@ -0,0 +1,663 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CampusGen AI – Full-Feature Gradio Application
|
| 4 |
+
Multi-tab poster generation platform for Hugging Face Spaces.
|
| 5 |
+
|
| 6 |
+
Tabs:
|
| 7 |
+
1. Text → Poster (Flux + LoRA + Groq LLM)
|
| 8 |
+
2. Reference Image (IP-Adapter + LoRA)
|
| 9 |
+
3. Image Transform (Img2Img pipeline)
|
| 10 |
+
4. Inpainting / Edit (Mask-based regeneration)
|
| 11 |
+
5. HD Upscale (Real-ESRGAN 4x)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import time
|
| 16 |
+
import logging
|
| 17 |
+
from typing import Optional
|
| 18 |
+
|
| 19 |
+
import torch
|
| 20 |
+
import gradio as gr
|
| 21 |
+
|
| 22 |
+
# HF Spaces ZeroGPU decorator (works even if package isn't installed)
|
| 23 |
+
try:
|
| 24 |
+
import spaces
|
| 25 |
+
HAS_SPACES = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
HAS_SPACES = False
|
| 28 |
+
class _FakeSpaces:
|
| 29 |
+
@staticmethod
|
| 30 |
+
def GPU(duration=60):
|
| 31 |
+
def decorator(fn):
|
| 32 |
+
return fn
|
| 33 |
+
return decorator
|
| 34 |
+
spaces = _FakeSpaces()
|
| 35 |
+
|
| 36 |
+
from pipelines import get_pipeline_manager, flush_vram
|
| 37 |
+
from prompt_engine import (
|
| 38 |
+
build_text2img_prompt,
|
| 39 |
+
build_img2img_prompt,
|
| 40 |
+
build_inpaint_prompt,
|
| 41 |
+
STYLE_MAP,
|
| 42 |
+
EVENT_TYPE_HINTS,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
logging.basicConfig(level=logging.INFO)
|
| 46 |
+
logger = logging.getLogger(__name__)
|
| 47 |
+
|
| 48 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 49 |
+
# Dropdowns
|
| 50 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 51 |
+
EVENT_TYPES = list(EVENT_TYPE_HINTS.keys())
|
| 52 |
+
STYLES = list(STYLE_MAP.keys())
|
| 53 |
+
|
| 54 |
+
RESOLUTION_PRESETS = {
|
| 55 |
+
"Square (1024×1024)": (1024, 1024),
|
| 56 |
+
"Portrait (768×1152)": (768, 1152),
|
| 57 |
+
"Portrait Tall (768×1344)": (768, 1344),
|
| 58 |
+
"Landscape (1152×768)": (1152, 768),
|
| 59 |
+
"Landscape Wide (1344×768)": (1344, 768),
|
| 60 |
+
"Instagram Story (768×1365)": (768, 1365),
|
| 61 |
+
"A4 Poster (768×1086)": (768, 1086),
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 66 |
+
# GENERATION FUNCTIONS
|
| 67 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 68 |
+
|
| 69 |
+
@spaces.GPU(duration=90)
|
| 70 |
+
def generate_text2img(
|
| 71 |
+
event_description: str,
|
| 72 |
+
event_type: str,
|
| 73 |
+
style: str,
|
| 74 |
+
resolution: str,
|
| 75 |
+
num_variants: int,
|
| 76 |
+
num_steps: int,
|
| 77 |
+
guidance_scale: float,
|
| 78 |
+
lora_strength: float,
|
| 79 |
+
enable_upscale: bool,
|
| 80 |
+
seed: int,
|
| 81 |
+
):
|
| 82 |
+
"""Tab 1: Text-to-Poster generation."""
|
| 83 |
+
if not event_description.strip():
|
| 84 |
+
raise gr.Error("Please enter an event description!")
|
| 85 |
+
|
| 86 |
+
manager = get_pipeline_manager()
|
| 87 |
+
pipe = manager.get_text2img()
|
| 88 |
+
|
| 89 |
+
# Build prompt via Groq LLM
|
| 90 |
+
prompt = build_text2img_prompt(event_description, event_type, style)
|
| 91 |
+
logger.info(f"[Text2Img] Prompt: {prompt[:120]}...")
|
| 92 |
+
|
| 93 |
+
# Resolution
|
| 94 |
+
width, height = RESOLUTION_PRESETS.get(resolution, (1024, 1024))
|
| 95 |
+
|
| 96 |
+
# Seed
|
| 97 |
+
if seed == -1:
|
| 98 |
+
seed = int(time.time()) % (2**32)
|
| 99 |
+
|
| 100 |
+
# LoRA strength
|
| 101 |
+
if manager.is_lora_loaded:
|
| 102 |
+
pipe.fuse_lora(lora_scale=lora_strength)
|
| 103 |
+
|
| 104 |
+
# Generate variants
|
| 105 |
+
images = []
|
| 106 |
+
generator = torch.Generator("cpu").manual_seed(seed)
|
| 107 |
+
start = time.time()
|
| 108 |
+
|
| 109 |
+
for i in range(num_variants):
|
| 110 |
+
result = pipe(
|
| 111 |
+
prompt=prompt,
|
| 112 |
+
height=height,
|
| 113 |
+
width=width,
|
| 114 |
+
num_inference_steps=num_steps,
|
| 115 |
+
guidance_scale=guidance_scale,
|
| 116 |
+
generator=generator,
|
| 117 |
+
)
|
| 118 |
+
img = result.images[0]
|
| 119 |
+
|
| 120 |
+
if enable_upscale:
|
| 121 |
+
img = manager.upscale_image(img, scale=2)
|
| 122 |
+
|
| 123 |
+
images.append(img)
|
| 124 |
+
|
| 125 |
+
elapsed = time.time() - start
|
| 126 |
+
|
| 127 |
+
if manager.is_lora_loaded:
|
| 128 |
+
pipe.unfuse_lora()
|
| 129 |
+
|
| 130 |
+
info = (
|
| 131 |
+
f"**Generated {num_variants} poster(s) in {elapsed:.1f}s** | "
|
| 132 |
+
f"Seed: {seed} | {width}×{height} | Steps: {num_steps}\n\n"
|
| 133 |
+
f"**Prompt:**\n{prompt}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
return images, info
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@spaces.GPU(duration=90)
|
| 140 |
+
def generate_with_reference(
|
| 141 |
+
event_description: str,
|
| 142 |
+
reference_image,
|
| 143 |
+
style: str,
|
| 144 |
+
style_strength: float,
|
| 145 |
+
resolution: str,
|
| 146 |
+
num_steps: int,
|
| 147 |
+
guidance_scale: float,
|
| 148 |
+
enable_upscale: bool,
|
| 149 |
+
seed: int,
|
| 150 |
+
):
|
| 151 |
+
"""Tab 2: Reference image + text → poster (IP-Adapter)."""
|
| 152 |
+
if reference_image is None:
|
| 153 |
+
raise gr.Error("Please upload a reference image!")
|
| 154 |
+
if not event_description.strip():
|
| 155 |
+
raise gr.Error("Please enter an event description!")
|
| 156 |
+
|
| 157 |
+
from PIL import Image
|
| 158 |
+
|
| 159 |
+
manager = get_pipeline_manager()
|
| 160 |
+
pipe = manager.get_text2img()
|
| 161 |
+
pipe = manager.load_ip_adapter(pipe)
|
| 162 |
+
manager.set_ip_adapter_scale(pipe, scale=style_strength)
|
| 163 |
+
|
| 164 |
+
prompt = build_text2img_prompt(event_description, "Other", style)
|
| 165 |
+
width, height = RESOLUTION_PRESETS.get(resolution, (1024, 1024))
|
| 166 |
+
|
| 167 |
+
if seed == -1:
|
| 168 |
+
seed = int(time.time()) % (2**32)
|
| 169 |
+
|
| 170 |
+
generator = torch.Generator("cpu").manual_seed(seed)
|
| 171 |
+
start = time.time()
|
| 172 |
+
|
| 173 |
+
# Prepare reference image
|
| 174 |
+
ref_img = Image.fromarray(reference_image).convert("RGB").resize((224, 224))
|
| 175 |
+
|
| 176 |
+
result = pipe(
|
| 177 |
+
prompt=prompt,
|
| 178 |
+
ip_adapter_image=ref_img,
|
| 179 |
+
height=height,
|
| 180 |
+
width=width,
|
| 181 |
+
num_inference_steps=num_steps,
|
| 182 |
+
guidance_scale=guidance_scale,
|
| 183 |
+
generator=generator,
|
| 184 |
+
)
|
| 185 |
+
img = result.images[0]
|
| 186 |
+
elapsed = time.time() - start
|
| 187 |
+
|
| 188 |
+
if enable_upscale:
|
| 189 |
+
img = manager.upscale_image(img, scale=2)
|
| 190 |
+
|
| 191 |
+
info = (
|
| 192 |
+
f"**Generated in {elapsed:.1f}s** | Seed: {seed} | "
|
| 193 |
+
f"Style strength: {style_strength}\n\n"
|
| 194 |
+
f"**Prompt:**\n{prompt}"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
return img, info
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
@spaces.GPU(duration=90)
|
| 201 |
+
def generate_img2img(
|
| 202 |
+
input_image,
|
| 203 |
+
transform_description: str,
|
| 204 |
+
style: str,
|
| 205 |
+
denoising_strength: float,
|
| 206 |
+
num_steps: int,
|
| 207 |
+
guidance_scale: float,
|
| 208 |
+
enable_upscale: bool,
|
| 209 |
+
seed: int,
|
| 210 |
+
):
|
| 211 |
+
"""Tab 3: Image-to-image transformation."""
|
| 212 |
+
if input_image is None:
|
| 213 |
+
raise gr.Error("Please upload an image to transform!")
|
| 214 |
+
|
| 215 |
+
from PIL import Image
|
| 216 |
+
|
| 217 |
+
manager = get_pipeline_manager()
|
| 218 |
+
pipe = manager.get_img2img()
|
| 219 |
+
|
| 220 |
+
prompt = build_img2img_prompt(transform_description, style)
|
| 221 |
+
|
| 222 |
+
if seed == -1:
|
| 223 |
+
seed = int(time.time()) % (2**32)
|
| 224 |
+
|
| 225 |
+
generator = torch.Generator("cpu").manual_seed(seed)
|
| 226 |
+
init_image = Image.fromarray(input_image).convert("RGB").resize((1024, 1024))
|
| 227 |
+
|
| 228 |
+
start = time.time()
|
| 229 |
+
result = pipe(
|
| 230 |
+
prompt=prompt,
|
| 231 |
+
image=init_image,
|
| 232 |
+
strength=denoising_strength,
|
| 233 |
+
num_inference_steps=num_steps,
|
| 234 |
+
guidance_scale=guidance_scale,
|
| 235 |
+
generator=generator,
|
| 236 |
+
)
|
| 237 |
+
img = result.images[0]
|
| 238 |
+
elapsed = time.time() - start
|
| 239 |
+
|
| 240 |
+
if enable_upscale:
|
| 241 |
+
img = manager.upscale_image(img, scale=2)
|
| 242 |
+
|
| 243 |
+
info = (
|
| 244 |
+
f"**Transformed in {elapsed:.1f}s** | Seed: {seed} | "
|
| 245 |
+
f"Denoise: {denoising_strength}\n\n"
|
| 246 |
+
f"**Prompt:**\n{prompt}"
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
return img, info
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
@spaces.GPU(duration=90)
|
| 253 |
+
def generate_inpaint(
|
| 254 |
+
input_data: dict,
|
| 255 |
+
fill_description: str,
|
| 256 |
+
num_steps: int,
|
| 257 |
+
guidance_scale: float,
|
| 258 |
+
seed: int,
|
| 259 |
+
):
|
| 260 |
+
"""Tab 4: Inpainting – regenerate masked region."""
|
| 261 |
+
if input_data is None:
|
| 262 |
+
raise gr.Error("Please upload an image and draw a mask!")
|
| 263 |
+
|
| 264 |
+
from PIL import Image
|
| 265 |
+
import numpy as np
|
| 266 |
+
|
| 267 |
+
manager = get_pipeline_manager()
|
| 268 |
+
pipe = manager.get_inpaint()
|
| 269 |
+
|
| 270 |
+
prompt = build_inpaint_prompt(fill_description)
|
| 271 |
+
|
| 272 |
+
if seed == -1:
|
| 273 |
+
seed = int(time.time()) % (2**32)
|
| 274 |
+
|
| 275 |
+
generator = torch.Generator("cpu").manual_seed(seed)
|
| 276 |
+
|
| 277 |
+
# Extract image and mask from ImageEditor output
|
| 278 |
+
source_image = Image.fromarray(input_data["background"]).convert("RGB").resize((1024, 1024))
|
| 279 |
+
|
| 280 |
+
# Build mask from composite layers
|
| 281 |
+
if "layers" in input_data and len(input_data["layers"]) > 0:
|
| 282 |
+
mask_layer = input_data["layers"][0]
|
| 283 |
+
mask = Image.fromarray(mask_layer).convert("L").resize((1024, 1024))
|
| 284 |
+
# Binarize mask
|
| 285 |
+
mask = mask.point(lambda x: 255 if x > 10 else 0)
|
| 286 |
+
else:
|
| 287 |
+
raise gr.Error("Please draw on the image to create a mask!")
|
| 288 |
+
|
| 289 |
+
start = time.time()
|
| 290 |
+
result = pipe(
|
| 291 |
+
prompt=prompt,
|
| 292 |
+
image=source_image,
|
| 293 |
+
mask_image=mask,
|
| 294 |
+
height=1024,
|
| 295 |
+
width=1024,
|
| 296 |
+
num_inference_steps=num_steps,
|
| 297 |
+
guidance_scale=guidance_scale,
|
| 298 |
+
generator=generator,
|
| 299 |
+
)
|
| 300 |
+
img = result.images[0]
|
| 301 |
+
elapsed = time.time() - start
|
| 302 |
+
|
| 303 |
+
info = (
|
| 304 |
+
f"**Inpainted in {elapsed:.1f}s** | Seed: {seed}\n\n"
|
| 305 |
+
f"**Prompt:**\n{prompt}"
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
return img, info
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def upscale_only(input_image, scale_factor: int):
|
| 312 |
+
"""Tab 5: Standalone HD upscaling."""
|
| 313 |
+
if input_image is None:
|
| 314 |
+
raise gr.Error("Please upload an image to upscale!")
|
| 315 |
+
|
| 316 |
+
from PIL import Image
|
| 317 |
+
|
| 318 |
+
manager = get_pipeline_manager()
|
| 319 |
+
img = Image.fromarray(input_image).convert("RGB")
|
| 320 |
+
|
| 321 |
+
original_size = f"{img.width}×{img.height}"
|
| 322 |
+
|
| 323 |
+
start = time.time()
|
| 324 |
+
result = manager.upscale_image(img, scale=scale_factor)
|
| 325 |
+
elapsed = time.time() - start
|
| 326 |
+
|
| 327 |
+
new_size = f"{result.width}×{result.height}"
|
| 328 |
+
info = f"**Upscaled in {elapsed:.1f}s** | {original_size} → {new_size}"
|
| 329 |
+
|
| 330 |
+
return result, info
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# ────────────────────────────��────────────────────────────────────────────────
|
| 334 |
+
# GRADIO UI
|
| 335 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 336 |
+
|
| 337 |
+
css = """
|
| 338 |
+
.gradio-container {
|
| 339 |
+
max-width: 1400px !important;
|
| 340 |
+
margin: auto;
|
| 341 |
+
}
|
| 342 |
+
.title-text {
|
| 343 |
+
text-align: center;
|
| 344 |
+
font-size: 2.5em;
|
| 345 |
+
font-weight: 800;
|
| 346 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
|
| 347 |
+
-webkit-background-clip: text;
|
| 348 |
+
-webkit-text-fill-color: transparent;
|
| 349 |
+
margin-bottom: 0.2em;
|
| 350 |
+
letter-spacing: -0.02em;
|
| 351 |
+
}
|
| 352 |
+
.subtitle-text {
|
| 353 |
+
text-align: center;
|
| 354 |
+
color: #888;
|
| 355 |
+
font-size: 1.15em;
|
| 356 |
+
margin-bottom: 1.5em;
|
| 357 |
+
font-weight: 300;
|
| 358 |
+
}
|
| 359 |
+
.tab-nav button {
|
| 360 |
+
font-size: 1.05em !important;
|
| 361 |
+
font-weight: 600 !important;
|
| 362 |
+
}
|
| 363 |
+
.generate-btn {
|
| 364 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 365 |
+
border: none !important;
|
| 366 |
+
font-size: 1.1em !important;
|
| 367 |
+
}
|
| 368 |
+
.footer-text {
|
| 369 |
+
text-align: center;
|
| 370 |
+
color: #999;
|
| 371 |
+
font-size: 0.9em;
|
| 372 |
+
margin-top: 1em;
|
| 373 |
+
padding: 1em;
|
| 374 |
+
border-top: 1px solid #333;
|
| 375 |
+
}
|
| 376 |
+
"""
|
| 377 |
+
|
| 378 |
+
EXAMPLES = [
|
| 379 |
+
["IIT Indore Techfest 2026 — Robotics & AI Championships", "Technical Fest", "Tech-Futuristic"],
|
| 380 |
+
["Diwali Mela 2026 — Spark of Joy", "Diwali Celebration", "Traditional Indian"],
|
| 381 |
+
["Inter-College Basketball Championship", "Sports Tournament", "Vibrant and Energetic"],
|
| 382 |
+
["Photography Club Portfolio Night", "Club Recruitment", "Dark Premium"],
|
| 383 |
+
["ML/AI Workshop Series — From Zero to GPT", "Workshop / Seminar", "Gradient Modern"],
|
| 384 |
+
["Classical Kathak Dance Night", "Cultural Event", "Elegant and Professional"],
|
| 385 |
+
["Holi Hai! Campus Color Run", "Holi Festival", "Artistic and Creative"],
|
| 386 |
+
["Navratri Garba Night 2026", "Navratri / Garba", "Traditional Indian"],
|
| 387 |
+
["End-of-Year Farewell Party", "Freshers / Farewell", "Neon Glow"],
|
| 388 |
+
["Blood Donation Camp — Save Lives", "Blood Donation", "Modern Minimalist"],
|
| 389 |
+
]
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def build_app() -> gr.Blocks:
|
| 393 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft(), title="CampusGen AI") as demo:
|
| 394 |
+
|
| 395 |
+
# ── Header ───────────────────────────────────────────────────
|
| 396 |
+
gr.HTML(
|
| 397 |
+
'<div class="title-text">🎨 CampusGen AI</div>'
|
| 398 |
+
'<div class="subtitle-text">'
|
| 399 |
+
"Generate stunning event posters in seconds — "
|
| 400 |
+
"Text · Reference Image · Transform · Inpaint · Upscale"
|
| 401 |
+
"</div>"
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
with gr.Tabs() as tabs:
|
| 405 |
+
|
| 406 |
+
# ═══════════════════════════════════════════════════════════
|
| 407 |
+
# TAB 1: Text → Poster
|
| 408 |
+
# ═══════════════════════════════════════════════════════════
|
| 409 |
+
with gr.Tab("✍️ Text → Poster", id="text2img"):
|
| 410 |
+
with gr.Row():
|
| 411 |
+
with gr.Column(scale=1):
|
| 412 |
+
t2i_event = gr.Textbox(
|
| 413 |
+
label="📝 Describe Your Event",
|
| 414 |
+
placeholder="e.g., 'Annual tech fest with AI and robotics competitions at IIT Indore, March 2026'",
|
| 415 |
+
lines=3,
|
| 416 |
+
)
|
| 417 |
+
t2i_type = gr.Dropdown(
|
| 418 |
+
EVENT_TYPES, value="Technical Fest",
|
| 419 |
+
label="🏷️ Event Type",
|
| 420 |
+
)
|
| 421 |
+
t2i_style = gr.Dropdown(
|
| 422 |
+
STYLES, value="Vibrant and Energetic",
|
| 423 |
+
label="🎨 Visual Style",
|
| 424 |
+
)
|
| 425 |
+
t2i_resolution = gr.Dropdown(
|
| 426 |
+
list(RESOLUTION_PRESETS.keys()),
|
| 427 |
+
value="Portrait (768×1152)",
|
| 428 |
+
label="📐 Resolution",
|
| 429 |
+
)
|
| 430 |
+
t2i_variants = gr.Slider(
|
| 431 |
+
1, 4, value=1, step=1,
|
| 432 |
+
label="🔢 Number of Variants",
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
with gr.Accordion("⚙️ Advanced", open=False):
|
| 436 |
+
t2i_steps = gr.Slider(10, 50, value=28, step=1, label="Inference Steps")
|
| 437 |
+
t2i_cfg = gr.Slider(1.0, 10.0, value=3.5, step=0.5, label="Guidance Scale")
|
| 438 |
+
t2i_lora = gr.Slider(0.0, 1.5, value=0.85, step=0.05, label="LoRA Strength")
|
| 439 |
+
t2i_upscale = gr.Checkbox(label="🔍 HD Upscale (2x)", value=False)
|
| 440 |
+
t2i_seed = gr.Number(value=-1, label="Seed (-1 = random)")
|
| 441 |
+
|
| 442 |
+
t2i_btn = gr.Button("🚀 Generate Poster", variant="primary", size="lg", elem_classes=["generate-btn"])
|
| 443 |
+
|
| 444 |
+
with gr.Column(scale=1):
|
| 445 |
+
t2i_gallery = gr.Gallery(
|
| 446 |
+
label="Generated Posters", columns=2,
|
| 447 |
+
height=600, object_fit="contain",
|
| 448 |
+
)
|
| 449 |
+
t2i_info = gr.Markdown(label="Generation Info")
|
| 450 |
+
|
| 451 |
+
gr.Examples(
|
| 452 |
+
examples=EXAMPLES,
|
| 453 |
+
inputs=[t2i_event, t2i_type, t2i_style],
|
| 454 |
+
label="💡 Try These Examples",
|
| 455 |
+
)
|
| 456 |
+
|
| 457 |
+
t2i_btn.click(
|
| 458 |
+
fn=generate_text2img,
|
| 459 |
+
inputs=[
|
| 460 |
+
t2i_event, t2i_type, t2i_style, t2i_resolution,
|
| 461 |
+
t2i_variants, t2i_steps, t2i_cfg, t2i_lora,
|
| 462 |
+
t2i_upscale, t2i_seed,
|
| 463 |
+
],
|
| 464 |
+
outputs=[t2i_gallery, t2i_info],
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
# ═══════════════════════════════════════════════════════════
|
| 468 |
+
# TAB 2: Reference Image
|
| 469 |
+
# ═══════════════════════════════════════════════════════════
|
| 470 |
+
with gr.Tab("🖼️ Reference Image", id="reference"):
|
| 471 |
+
gr.Markdown(
|
| 472 |
+
"Upload a poster you like → the AI will extract its **visual style** "
|
| 473 |
+
"and blend it with your event description using IP-Adapter."
|
| 474 |
+
)
|
| 475 |
+
with gr.Row():
|
| 476 |
+
with gr.Column(scale=1):
|
| 477 |
+
ref_image = gr.Image(
|
| 478 |
+
label="📎 Upload Reference Poster",
|
| 479 |
+
type="numpy", height=300,
|
| 480 |
+
)
|
| 481 |
+
ref_event = gr.Textbox(
|
| 482 |
+
label="📝 Describe Your Event",
|
| 483 |
+
placeholder="e.g., 'Annual cultural night with dance performances'",
|
| 484 |
+
lines=2,
|
| 485 |
+
)
|
| 486 |
+
ref_style = gr.Dropdown(
|
| 487 |
+
STYLES, value="Vibrant and Energetic",
|
| 488 |
+
label="🎨 Base Style",
|
| 489 |
+
)
|
| 490 |
+
ref_strength = gr.Slider(
|
| 491 |
+
0.0, 1.0, value=0.6, step=0.05,
|
| 492 |
+
label="🎚️ Reference Influence (0=ignore, 1=copy)",
|
| 493 |
+
)
|
| 494 |
+
ref_resolution = gr.Dropdown(
|
| 495 |
+
list(RESOLUTION_PRESETS.keys()),
|
| 496 |
+
value="Portrait (768×1152)",
|
| 497 |
+
label="📐 Resolution",
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
with gr.Accordion("⚙️ Advanced", open=False):
|
| 501 |
+
ref_steps = gr.Slider(10, 50, value=28, step=1, label="Steps")
|
| 502 |
+
ref_cfg = gr.Slider(1.0, 10.0, value=3.5, step=0.5, label="Guidance")
|
| 503 |
+
ref_upscale = gr.Checkbox(label="🔍 HD Upscale (2x)", value=False)
|
| 504 |
+
ref_seed = gr.Number(value=-1, label="Seed")
|
| 505 |
+
|
| 506 |
+
ref_btn = gr.Button("🚀 Generate with Reference", variant="primary", size="lg", elem_classes=["generate-btn"])
|
| 507 |
+
|
| 508 |
+
with gr.Column(scale=1):
|
| 509 |
+
ref_output = gr.Image(label="Generated Poster", type="pil", height=600)
|
| 510 |
+
ref_info = gr.Markdown()
|
| 511 |
+
|
| 512 |
+
ref_btn.click(
|
| 513 |
+
fn=generate_with_reference,
|
| 514 |
+
inputs=[
|
| 515 |
+
ref_event, ref_image, ref_style, ref_strength,
|
| 516 |
+
ref_resolution, ref_steps, ref_cfg, ref_upscale, ref_seed,
|
| 517 |
+
],
|
| 518 |
+
outputs=[ref_output, ref_info],
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
# ═══════════════════════════════════════════════════════════
|
| 522 |
+
# TAB 3: Image Transform
|
| 523 |
+
# ═══════════════════════════════════════════════════════════
|
| 524 |
+
with gr.Tab("🔄 Image Transform", id="img2img"):
|
| 525 |
+
gr.Markdown(
|
| 526 |
+
"Upload an existing poster → describe how you want it **transformed**. "
|
| 527 |
+
"Lower denoising = subtle changes, higher = dramatic restyle."
|
| 528 |
+
)
|
| 529 |
+
with gr.Row():
|
| 530 |
+
with gr.Column(scale=1):
|
| 531 |
+
i2i_image = gr.Image(
|
| 532 |
+
label="📎 Upload Poster to Transform",
|
| 533 |
+
type="numpy", height=300,
|
| 534 |
+
)
|
| 535 |
+
i2i_desc = gr.Textbox(
|
| 536 |
+
label="🔄 Describe the Transformation",
|
| 537 |
+
placeholder="e.g., 'Make it neon-themed with darker background and glow effects'",
|
| 538 |
+
lines=2,
|
| 539 |
+
)
|
| 540 |
+
i2i_style = gr.Dropdown(
|
| 541 |
+
STYLES, value="Tech-Futuristic",
|
| 542 |
+
label="🎨 Target Style",
|
| 543 |
+
)
|
| 544 |
+
i2i_denoise = gr.Slider(
|
| 545 |
+
0.1, 1.0, value=0.65, step=0.05,
|
| 546 |
+
label="🎚️ Transformation Strength (0.1=subtle, 1.0=complete restyle)",
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
with gr.Accordion("⚙️ Advanced", open=False):
|
| 550 |
+
i2i_steps = gr.Slider(10, 50, value=28, step=1, label="Steps")
|
| 551 |
+
i2i_cfg = gr.Slider(1.0, 10.0, value=3.5, step=0.5, label="Guidance")
|
| 552 |
+
i2i_upscale = gr.Checkbox(label="🔍 HD Upscale (2x)", value=False)
|
| 553 |
+
i2i_seed = gr.Number(value=-1, label="Seed")
|
| 554 |
+
|
| 555 |
+
i2i_btn = gr.Button("🔄 Transform Poster", variant="primary", size="lg", elem_classes=["generate-btn"])
|
| 556 |
+
|
| 557 |
+
with gr.Column(scale=1):
|
| 558 |
+
i2i_output = gr.Image(label="Transformed Poster", type="pil", height=600)
|
| 559 |
+
i2i_info = gr.Markdown()
|
| 560 |
+
|
| 561 |
+
i2i_btn.click(
|
| 562 |
+
fn=generate_img2img,
|
| 563 |
+
inputs=[
|
| 564 |
+
i2i_image, i2i_desc, i2i_style, i2i_denoise,
|
| 565 |
+
i2i_steps, i2i_cfg, i2i_upscale, i2i_seed,
|
| 566 |
+
],
|
| 567 |
+
outputs=[i2i_output, i2i_info],
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
# ═══════════════════════════════════════════════════════════
|
| 571 |
+
# TAB 4: Inpainting
|
| 572 |
+
# ═══════════════════════════════════════════════════════════
|
| 573 |
+
with gr.Tab("🖌️ Inpaint / Edit", id="inpaint"):
|
| 574 |
+
gr.Markdown(
|
| 575 |
+
"Upload a poster → **draw over the area** you want to change → "
|
| 576 |
+
"describe what should replace it. The rest of the poster stays intact."
|
| 577 |
+
)
|
| 578 |
+
with gr.Row():
|
| 579 |
+
with gr.Column(scale=1):
|
| 580 |
+
inp_editor = gr.ImageEditor(
|
| 581 |
+
label="🖌️ Draw Mask on Poster",
|
| 582 |
+
type="numpy",
|
| 583 |
+
height=400,
|
| 584 |
+
brush=gr.Brush(
|
| 585 |
+
default_size=30,
|
| 586 |
+
colors=["#FFFFFF"],
|
| 587 |
+
color_mode="fixed",
|
| 588 |
+
),
|
| 589 |
+
eraser=gr.Eraser(default_size=20),
|
| 590 |
+
layers=True,
|
| 591 |
+
)
|
| 592 |
+
inp_desc = gr.Textbox(
|
| 593 |
+
label="📝 What Should Fill the Masked Area?",
|
| 594 |
+
placeholder="e.g., 'A golden trophy with confetti'",
|
| 595 |
+
lines=2,
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
with gr.Accordion("⚙️ Advanced", open=False):
|
| 599 |
+
inp_steps = gr.Slider(10, 50, value=28, step=1, label="Steps")
|
| 600 |
+
inp_cfg = gr.Slider(1.0, 10.0, value=3.5, step=0.5, label="Guidance")
|
| 601 |
+
inp_seed = gr.Number(value=-1, label="Seed")
|
| 602 |
+
|
| 603 |
+
inp_btn = gr.Button("🖌️ Inpaint Region", variant="primary", size="lg", elem_classes=["generate-btn"])
|
| 604 |
+
|
| 605 |
+
with gr.Column(scale=1):
|
| 606 |
+
inp_output = gr.Image(label="Inpainted Poster", type="pil", height=600)
|
| 607 |
+
inp_info = gr.Markdown()
|
| 608 |
+
|
| 609 |
+
inp_btn.click(
|
| 610 |
+
fn=generate_inpaint,
|
| 611 |
+
inputs=[inp_editor, inp_desc, inp_steps, inp_cfg, inp_seed],
|
| 612 |
+
outputs=[inp_output, inp_info],
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
# ═══════════════════════════════════════════════════════════
|
| 616 |
+
# TAB 5: HD Upscale
|
| 617 |
+
# ════════════════════════════════════════════════��══════════
|
| 618 |
+
with gr.Tab("🔍 HD Upscale", id="upscale"):
|
| 619 |
+
gr.Markdown(
|
| 620 |
+
"Upload any image → get a **4x upscaled** HD version using Real-ESRGAN. "
|
| 621 |
+
"Great for making generated posters print-ready."
|
| 622 |
+
)
|
| 623 |
+
with gr.Row():
|
| 624 |
+
with gr.Column(scale=1):
|
| 625 |
+
up_image = gr.Image(
|
| 626 |
+
label="📎 Upload Image",
|
| 627 |
+
type="numpy", height=300,
|
| 628 |
+
)
|
| 629 |
+
up_scale = gr.Radio(
|
| 630 |
+
[2, 4], value=4, label="🔍 Scale Factor",
|
| 631 |
+
)
|
| 632 |
+
up_btn = gr.Button("🔍 Upscale", variant="primary", size="lg", elem_classes=["generate-btn"])
|
| 633 |
+
|
| 634 |
+
with gr.Column(scale=1):
|
| 635 |
+
up_output = gr.Image(label="Upscaled Image", type="pil", height=600)
|
| 636 |
+
up_info = gr.Markdown()
|
| 637 |
+
|
| 638 |
+
up_btn.click(
|
| 639 |
+
fn=upscale_only,
|
| 640 |
+
inputs=[up_image, up_scale],
|
| 641 |
+
outputs=[up_output, up_info],
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
+
# ── Footer ───────────────────────────────────────────────────
|
| 645 |
+
gr.HTML(
|
| 646 |
+
'<div class="footer-text">'
|
| 647 |
+
"<strong>CampusGen AI</strong> — "
|
| 648 |
+
"Fine-tuned on 71,000+ event poster images across 57 subcategories | "
|
| 649 |
+
"Flux.1-dev + LoRA + IP-Adapter + Real-ESRGAN | "
|
| 650 |
+
"Groq Llama 3.3 70B for smart prompts<br>"
|
| 651 |
+
"Built with ❤️ for the Indian campus community"
|
| 652 |
+
"</div>"
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
+
return demo
|
| 656 |
+
|
| 657 |
+
|
| 658 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 659 |
+
# Launch
|
| 660 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 661 |
+
if __name__ == "__main__":
|
| 662 |
+
demo = build_app()
|
| 663 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
deployment/pipelines.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CampusGen AI – Pipeline Manager
|
| 4 |
+
Centralized lazy-loading of all generation pipelines.
|
| 5 |
+
Shares base model + LoRA across text2img, img2img, inpainting.
|
| 6 |
+
Manages VRAM via CPU offloading for 16GB GPUs / HF ZeroGPU.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import gc
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Optional
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
import numpy as np
|
| 17 |
+
from PIL import Image
|
| 18 |
+
|
| 19 |
+
# ─── SM120 (Blackwell) CUDA optimizations ───────────────────────────────────
|
| 20 |
+
if torch.cuda.is_available():
|
| 21 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 22 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 27 |
+
# Configuration
|
| 28 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 29 |
+
HF_USERNAME = os.environ.get("HF_USERNAME", "YOUR_USERNAME")
|
| 30 |
+
LORA_REPO = f"{HF_USERNAME}/campus-ai-poster-lora"
|
| 31 |
+
LORA_FILENAME = "campus_ai_poster_lora.safetensors"
|
| 32 |
+
BASE_MODEL = "black-forest-labs/FLUX.1-dev"
|
| 33 |
+
|
| 34 |
+
# IP-Adapter for Flux
|
| 35 |
+
IP_ADAPTER_REPO = "h94/IP-Adapter"
|
| 36 |
+
IP_ADAPTER_SUBFOLDER = "sdxl_models" # Flux-compatible adapter
|
| 37 |
+
IMAGE_ENCODER_REPO = "openai/clip-vit-large-patch14"
|
| 38 |
+
|
| 39 |
+
# Real-ESRGAN upscaler
|
| 40 |
+
ESRGAN_MODEL_NAME = "RealESRGAN_x4plus"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def flush_vram():
|
| 44 |
+
"""Aggressively free GPU memory."""
|
| 45 |
+
gc.collect()
|
| 46 |
+
if torch.cuda.is_available():
|
| 47 |
+
torch.cuda.empty_cache()
|
| 48 |
+
torch.cuda.synchronize()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class PipelineManager:
|
| 52 |
+
"""
|
| 53 |
+
Manages all generation pipelines with shared base model.
|
| 54 |
+
Only ONE pipeline mode is active at a time to fit in 16GB VRAM.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(self):
|
| 58 |
+
self._text2img = None
|
| 59 |
+
self._img2img = None
|
| 60 |
+
self._inpaint = None
|
| 61 |
+
self._ip_adapter_loaded = False
|
| 62 |
+
self._upscaler = None
|
| 63 |
+
self._active_mode: Optional[str] = None
|
| 64 |
+
self._lora_loaded = False
|
| 65 |
+
|
| 66 |
+
# ── Text-to-Image ────────────────────────────────────────────────────
|
| 67 |
+
|
| 68 |
+
def get_text2img(self):
|
| 69 |
+
"""Load or return text-to-image pipeline."""
|
| 70 |
+
if self._active_mode == "text2img" and self._text2img is not None:
|
| 71 |
+
return self._text2img
|
| 72 |
+
|
| 73 |
+
self._unload_all()
|
| 74 |
+
|
| 75 |
+
from diffusers import FluxPipeline
|
| 76 |
+
|
| 77 |
+
logger.info("Loading Flux.1-dev text-to-image pipeline...")
|
| 78 |
+
self._text2img = FluxPipeline.from_pretrained(
|
| 79 |
+
BASE_MODEL,
|
| 80 |
+
torch_dtype=torch.bfloat16,
|
| 81 |
+
)
|
| 82 |
+
self._text2img.enable_model_cpu_offload()
|
| 83 |
+
self._load_lora(self._text2img)
|
| 84 |
+
|
| 85 |
+
# SM120: compile transformer for faster inference
|
| 86 |
+
try:
|
| 87 |
+
self._text2img.transformer = torch.compile(
|
| 88 |
+
self._text2img.transformer, mode="max-autotune"
|
| 89 |
+
)
|
| 90 |
+
except Exception:
|
| 91 |
+
pass
|
| 92 |
+
|
| 93 |
+
self._active_mode = "text2img"
|
| 94 |
+
logger.info("Text-to-image pipeline ready.")
|
| 95 |
+
return self._text2img
|
| 96 |
+
|
| 97 |
+
# ── Image-to-Image ───────────────────────────────────────────────────
|
| 98 |
+
|
| 99 |
+
def get_img2img(self):
|
| 100 |
+
"""Load or return img2img pipeline."""
|
| 101 |
+
if self._active_mode == "img2img" and self._img2img is not None:
|
| 102 |
+
return self._img2img
|
| 103 |
+
|
| 104 |
+
self._unload_all()
|
| 105 |
+
|
| 106 |
+
from diffusers import FluxImg2ImgPipeline
|
| 107 |
+
|
| 108 |
+
logger.info("Loading Flux.1-dev img2img pipeline...")
|
| 109 |
+
self._img2img = FluxImg2ImgPipeline.from_pretrained(
|
| 110 |
+
BASE_MODEL,
|
| 111 |
+
torch_dtype=torch.bfloat16,
|
| 112 |
+
)
|
| 113 |
+
self._img2img.enable_model_cpu_offload()
|
| 114 |
+
self._load_lora(self._img2img)
|
| 115 |
+
|
| 116 |
+
try:
|
| 117 |
+
self._img2img.transformer = torch.compile(
|
| 118 |
+
self._img2img.transformer, mode="max-autotune"
|
| 119 |
+
)
|
| 120 |
+
except Exception:
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
self._active_mode = "img2img"
|
| 124 |
+
logger.info("Img2img pipeline ready.")
|
| 125 |
+
return self._img2img
|
| 126 |
+
|
| 127 |
+
# ── Inpainting ───────────────────────────────────────────────────────
|
| 128 |
+
|
| 129 |
+
def get_inpaint(self):
|
| 130 |
+
"""Load or return inpainting pipeline."""
|
| 131 |
+
if self._active_mode == "inpaint" and self._inpaint is not None:
|
| 132 |
+
return self._inpaint
|
| 133 |
+
|
| 134 |
+
self._unload_all()
|
| 135 |
+
|
| 136 |
+
from diffusers import FluxInpaintPipeline
|
| 137 |
+
|
| 138 |
+
logger.info("Loading Flux.1-dev inpainting pipeline...")
|
| 139 |
+
self._inpaint = FluxInpaintPipeline.from_pretrained(
|
| 140 |
+
BASE_MODEL,
|
| 141 |
+
torch_dtype=torch.bfloat16,
|
| 142 |
+
)
|
| 143 |
+
self._inpaint.enable_model_cpu_offload()
|
| 144 |
+
self._load_lora(self._inpaint)
|
| 145 |
+
|
| 146 |
+
try:
|
| 147 |
+
self._inpaint.transformer = torch.compile(
|
| 148 |
+
self._inpaint.transformer, mode="max-autotune"
|
| 149 |
+
)
|
| 150 |
+
except Exception:
|
| 151 |
+
pass
|
| 152 |
+
|
| 153 |
+
self._active_mode = "inpaint"
|
| 154 |
+
logger.info("Inpainting pipeline ready.")
|
| 155 |
+
return self._inpaint
|
| 156 |
+
|
| 157 |
+
# ── IP-Adapter (style from reference image) ──────────────────────────
|
| 158 |
+
|
| 159 |
+
def load_ip_adapter(self, pipe):
|
| 160 |
+
"""
|
| 161 |
+
Attach IP-Adapter to the current pipeline for reference-image input.
|
| 162 |
+
Uses CLIP image encoder to extract style features.
|
| 163 |
+
"""
|
| 164 |
+
if self._ip_adapter_loaded:
|
| 165 |
+
return pipe
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
logger.info("Loading IP-Adapter for reference image support...")
|
| 169 |
+
pipe.load_ip_adapter(
|
| 170 |
+
IP_ADAPTER_REPO,
|
| 171 |
+
subfolder=IP_ADAPTER_SUBFOLDER,
|
| 172 |
+
weight_name="ip-adapter-plus_sdxl_vit-h.safetensors",
|
| 173 |
+
)
|
| 174 |
+
self._ip_adapter_loaded = True
|
| 175 |
+
logger.info("IP-Adapter loaded successfully.")
|
| 176 |
+
except Exception as e:
|
| 177 |
+
logger.warning(f"Could not load IP-Adapter: {e}")
|
| 178 |
+
logger.warning("Reference image feature will be disabled.")
|
| 179 |
+
|
| 180 |
+
return pipe
|
| 181 |
+
|
| 182 |
+
def set_ip_adapter_scale(self, pipe, scale: float = 0.6):
|
| 183 |
+
"""Set the influence strength of the reference image."""
|
| 184 |
+
if self._ip_adapter_loaded:
|
| 185 |
+
pipe.set_ip_adapter_scale(scale)
|
| 186 |
+
|
| 187 |
+
# ── Real-ESRGAN Upscaler ─────────────────────────────────────────────
|
| 188 |
+
|
| 189 |
+
def get_upscaler(self):
|
| 190 |
+
"""Load and return the Real-ESRGAN upscaler model."""
|
| 191 |
+
if self._upscaler is not None:
|
| 192 |
+
return self._upscaler
|
| 193 |
+
|
| 194 |
+
try:
|
| 195 |
+
from realesrgan import RealESRGANer
|
| 196 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 197 |
+
|
| 198 |
+
logger.info("Loading Real-ESRGAN x4 upscaler...")
|
| 199 |
+
|
| 200 |
+
model = RRDBNet(
|
| 201 |
+
num_in_ch=3, num_out_ch=3, num_feat=64,
|
| 202 |
+
num_block=23, num_grow_ch=32, scale=4,
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
self._upscaler = RealESRGANer(
|
| 206 |
+
scale=4,
|
| 207 |
+
model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
|
| 208 |
+
model=model,
|
| 209 |
+
tile=512, # Tile size for memory-efficient upscaling
|
| 210 |
+
tile_pad=10,
|
| 211 |
+
pre_pad=0,
|
| 212 |
+
half=True, # FP16 for speed
|
| 213 |
+
)
|
| 214 |
+
logger.info("Real-ESRGAN upscaler ready.")
|
| 215 |
+
|
| 216 |
+
except ImportError:
|
| 217 |
+
logger.warning(
|
| 218 |
+
"Real-ESRGAN not installed. Using Pillow LANCZOS fallback."
|
| 219 |
+
)
|
| 220 |
+
self._upscaler = "pillow_fallback"
|
| 221 |
+
|
| 222 |
+
except Exception as e:
|
| 223 |
+
logger.warning(f"Could not load Real-ESRGAN: {e}. Using fallback.")
|
| 224 |
+
self._upscaler = "pillow_fallback"
|
| 225 |
+
|
| 226 |
+
return self._upscaler
|
| 227 |
+
|
| 228 |
+
def upscale_image(self, image: Image.Image, scale: int = 4) -> Image.Image:
|
| 229 |
+
"""
|
| 230 |
+
Upscale an image using Real-ESRGAN (or Pillow fallback).
|
| 231 |
+
Input: PIL Image
|
| 232 |
+
Output: PIL Image (upscaled)
|
| 233 |
+
"""
|
| 234 |
+
upscaler = self.get_upscaler()
|
| 235 |
+
|
| 236 |
+
if upscaler == "pillow_fallback":
|
| 237 |
+
# Simple Pillow resize as fallback
|
| 238 |
+
new_size = (image.width * scale, image.height * scale)
|
| 239 |
+
return image.resize(new_size, Image.LANCZOS)
|
| 240 |
+
|
| 241 |
+
# Real-ESRGAN
|
| 242 |
+
img_np = np.array(image)
|
| 243 |
+
# Real-ESRGAN expects BGR
|
| 244 |
+
import cv2
|
| 245 |
+
img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
|
| 246 |
+
|
| 247 |
+
output, _ = upscaler.enhance(img_bgr, outscale=scale)
|
| 248 |
+
output_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
|
| 249 |
+
|
| 250 |
+
return Image.fromarray(output_rgb)
|
| 251 |
+
|
| 252 |
+
# ── LoRA Loading ─────────────────────────────────────────────────────
|
| 253 |
+
|
| 254 |
+
def _load_lora(self, pipe):
|
| 255 |
+
"""Load LoRA weights onto a pipeline."""
|
| 256 |
+
logger.info(f"Loading LoRA weights from {LORA_REPO}...")
|
| 257 |
+
try:
|
| 258 |
+
pipe.load_lora_weights(
|
| 259 |
+
LORA_REPO,
|
| 260 |
+
weight_name=LORA_FILENAME,
|
| 261 |
+
)
|
| 262 |
+
self._lora_loaded = True
|
| 263 |
+
logger.info("LoRA weights loaded successfully.")
|
| 264 |
+
except Exception as e:
|
| 265 |
+
logger.warning(f"Could not load LoRA weights: {e}")
|
| 266 |
+
logger.warning("Running with base Flux model only.")
|
| 267 |
+
self._lora_loaded = False
|
| 268 |
+
|
| 269 |
+
# ── Pipeline Switching ───────────────────────────────────────────────
|
| 270 |
+
|
| 271 |
+
def _unload_all(self):
|
| 272 |
+
"""Unload all pipelines to free VRAM before loading a new one."""
|
| 273 |
+
logger.info(f"Unloading active pipeline (was: {self._active_mode})...")
|
| 274 |
+
|
| 275 |
+
self._text2img = None
|
| 276 |
+
self._img2img = None
|
| 277 |
+
self._inpaint = None
|
| 278 |
+
self._ip_adapter_loaded = False
|
| 279 |
+
self._active_mode = None
|
| 280 |
+
|
| 281 |
+
flush_vram()
|
| 282 |
+
|
| 283 |
+
@property
|
| 284 |
+
def is_lora_loaded(self) -> bool:
|
| 285 |
+
return self._lora_loaded
|
| 286 |
+
|
| 287 |
+
@property
|
| 288 |
+
def active_mode(self) -> Optional[str]:
|
| 289 |
+
return self._active_mode
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 293 |
+
# Singleton
|
| 294 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 295 |
+
_manager: Optional[PipelineManager] = None
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def get_pipeline_manager() -> PipelineManager:
|
| 299 |
+
"""Get or create the global pipeline manager singleton."""
|
| 300 |
+
global _manager
|
| 301 |
+
if _manager is None:
|
| 302 |
+
_manager = PipelineManager()
|
| 303 |
+
return _manager
|
deployment/prompt_engine.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CampusGen AI – Prompt Engine
|
| 4 |
+
Uses Groq Llama 3.3 70B to transform simple event descriptions
|
| 5 |
+
into detailed, high-quality image generation prompts.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Optional
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 15 |
+
# Configuration
|
| 16 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 17 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
|
| 18 |
+
GROQ_MODEL = "llama-3.3-70b-versatile"
|
| 19 |
+
TRIGGER_WORD = "campus_ai_poster"
|
| 20 |
+
|
| 21 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 22 |
+
# System Prompts (per mode)
|
| 23 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 24 |
+
SYSTEM_TEXT2IMG = f"""You are a world-class poster design expert specializing in Indian college event posters. Given an event description, generate a detailed, cinematic image generation prompt.
|
| 25 |
+
|
| 26 |
+
Your prompt MUST include:
|
| 27 |
+
1. Composition & layout (center-stage focal point, text hierarchy areas, decorative borders)
|
| 28 |
+
2. Color palette (specific hex-inspired descriptions, gradients, mood)
|
| 29 |
+
3. Typography style (bold sans-serif, elegant serif, handwritten, neon glow)
|
| 30 |
+
4. Background elements (abstract patterns, venue imagery, thematic textures)
|
| 31 |
+
5. Lighting & atmosphere (dramatic spotlights, warm glow, neon reflections)
|
| 32 |
+
6. Cultural/thematic motifs appropriate to the event
|
| 33 |
+
|
| 34 |
+
RULES:
|
| 35 |
+
- ALWAYS start with "{TRIGGER_WORD}"
|
| 36 |
+
- Keep under 200 words
|
| 37 |
+
- Be extremely specific about visual details
|
| 38 |
+
- For Indian events, include culturally authentic motifs (rangoli, diyas, mehendi, etc.)
|
| 39 |
+
- Describe the poster as a finished design, not a scene
|
| 40 |
+
- Output ONLY the prompt, nothing else"""
|
| 41 |
+
|
| 42 |
+
SYSTEM_IMG2IMG = f"""You are a poster restyling expert. Given a description of how the user wants to transform an existing poster, generate a detailed prompt describing the desired output.
|
| 43 |
+
|
| 44 |
+
Focus on:
|
| 45 |
+
1. The new visual style to apply
|
| 46 |
+
2. Color palette changes
|
| 47 |
+
3. Typography modifications
|
| 48 |
+
4. Atmosphere and mood shifts
|
| 49 |
+
5. Elements to preserve vs. change
|
| 50 |
+
|
| 51 |
+
RULES:
|
| 52 |
+
- ALWAYS start with "{TRIGGER_WORD}"
|
| 53 |
+
- Keep under 150 words
|
| 54 |
+
- Describe the desired RESULT, not the process
|
| 55 |
+
- Output ONLY the prompt"""
|
| 56 |
+
|
| 57 |
+
SYSTEM_INPAINT = f"""You are a poster editing expert. Given a description of what region the user wants to regenerate on a poster, generate a prompt describing what should fill that region.
|
| 58 |
+
|
| 59 |
+
Focus on:
|
| 60 |
+
1. What visual elements should appear in the masked area
|
| 61 |
+
2. Style consistency with the surrounding poster
|
| 62 |
+
3. Color and lighting continuity
|
| 63 |
+
|
| 64 |
+
RULES:
|
| 65 |
+
- ALWAYS start with "{TRIGGER_WORD}"
|
| 66 |
+
- Keep under 100 words
|
| 67 |
+
- Be specific about what fills the masked area
|
| 68 |
+
- Output ONLY the prompt"""
|
| 69 |
+
|
| 70 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 71 |
+
# Style Descriptions
|
| 72 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 73 |
+
STYLE_MAP = {
|
| 74 |
+
"Vibrant and Energetic": (
|
| 75 |
+
"vibrant energetic colors, electric gradients from magenta to cyan, "
|
| 76 |
+
"dynamic diagonal composition, bold sans-serif typography, "
|
| 77 |
+
"particle effects and light streaks"
|
| 78 |
+
),
|
| 79 |
+
"Elegant and Professional": (
|
| 80 |
+
"elegant professional design, deep navy and gold color scheme, "
|
| 81 |
+
"clean serif typography, subtle gradient backgrounds, "
|
| 82 |
+
"refined geometric accents"
|
| 83 |
+
),
|
| 84 |
+
"Modern Minimalist": (
|
| 85 |
+
"modern minimalist design, generous white space, "
|
| 86 |
+
"monochromatic palette with single accent color, "
|
| 87 |
+
"thin geometric lines, clean sans-serif typography"
|
| 88 |
+
),
|
| 89 |
+
"Traditional Indian": (
|
| 90 |
+
"traditional Indian design, warm gold saffron and deep red palette, "
|
| 91 |
+
"ornate mandala borders, rangoli-inspired patterns, "
|
| 92 |
+
"decorative Devanagari-style typography, paisley motifs"
|
| 93 |
+
),
|
| 94 |
+
"Tech-Futuristic": (
|
| 95 |
+
"futuristic cyberpunk tech design, dark background with neon glow, "
|
| 96 |
+
"holographic elements, circuit board patterns, "
|
| 97 |
+
"glitch text effects, electric blue and purple neon"
|
| 98 |
+
),
|
| 99 |
+
"Artistic and Creative": (
|
| 100 |
+
"artistic watercolor splash design, fluid organic shapes, "
|
| 101 |
+
"hand-painted texture, eclectic mixed typography, "
|
| 102 |
+
"ink splatter accents, warm earthy tones"
|
| 103 |
+
),
|
| 104 |
+
"Neon Glow": (
|
| 105 |
+
"neon glow poster design, deep black background, "
|
| 106 |
+
"vivid neon tubes in pink cyan and yellow, "
|
| 107 |
+
"reflective surfaces, urban night atmosphere, glow typography"
|
| 108 |
+
),
|
| 109 |
+
"Retro Vintage": (
|
| 110 |
+
"retro vintage poster design, distressed paper texture, "
|
| 111 |
+
"muted warm color palette, bold block letters, "
|
| 112 |
+
"halftone dot patterns, 70s inspired graphics"
|
| 113 |
+
),
|
| 114 |
+
"Dark Premium": (
|
| 115 |
+
"dark premium poster design, matte black with metallic gold accents, "
|
| 116 |
+
"luxury typography, subtle emboss effects, "
|
| 117 |
+
"dramatic lighting, high contrast minimal elements"
|
| 118 |
+
),
|
| 119 |
+
"Gradient Modern": (
|
| 120 |
+
"modern gradient poster, smooth multi-color gradient backgrounds, "
|
| 121 |
+
"floating 3D geometric shapes, soft shadows, "
|
| 122 |
+
"rounded sans-serif typography, glass morphism effects"
|
| 123 |
+
),
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
EVENT_TYPE_HINTS = {
|
| 127 |
+
"Technical Fest": "coding symbols, circuit patterns, robotic elements, binary code, tech logos",
|
| 128 |
+
"Cultural Event": "stage lights, dance silhouettes, musical instruments, spotlights, curtains",
|
| 129 |
+
"Sports Tournament": "dynamic action poses, sports equipment, stadium lights, motion blur, trophy",
|
| 130 |
+
"Workshop / Seminar": "whiteboard, notebooks, professional setting, light bulb icons, knowledge symbols",
|
| 131 |
+
"College Fest": "college campus backdrop, festive decorations, diverse crowd silhouettes, confetti",
|
| 132 |
+
"Diwali Celebration": "diyas, rangoli, fireworks, marigold garlands, Lord Ganesha motifs, sparklers",
|
| 133 |
+
"Holi Festival": "color powder splashes, water balloons, vibrant rainbow, pichkari, crowd celebration",
|
| 134 |
+
"Navratri / Garba": "dandiya sticks, ghagra choli silhouettes, Durga motifs, festive lights",
|
| 135 |
+
"Ganesh Chaturthi": "Lord Ganesha, modak, marigold, mandap, festive procession elements",
|
| 136 |
+
"Eid Celebration": "crescent moon and star, mosque silhouette, lanterns, arabesque patterns",
|
| 137 |
+
"Christmas / New Year": "Christmas tree, snowflakes, countdown clock, fireworks, candy canes",
|
| 138 |
+
"Club Recruitment": "diverse student silhouettes, creative tools, speech bubbles, join-us energy",
|
| 139 |
+
"Academic Event": "graduation cap, books, podium, academic shields, scholarly elements",
|
| 140 |
+
"Freshers / Farewell": "welcome banner, photo frames, nostalgic elements, stage performance",
|
| 141 |
+
"Blood Donation": "red cross, heart, blood drop, helping hands, medical symbols",
|
| 142 |
+
"Music Concert": "guitar, microphone, soundwaves, stage spotlights, crowd silhouettes",
|
| 143 |
+
"Food Festival": "food illustrations, chef hat, spice bowls, colorful plates, steam",
|
| 144 |
+
"Marathon / Fitness": "running silhouettes, finish line, stopwatch, sneakers, energy",
|
| 145 |
+
"Other": "professional event design, modern layout, eye-catching visual elements",
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def _call_groq(system_prompt: str, user_message: str) -> Optional[str]:
|
| 150 |
+
"""Make a Groq API call and return the response text."""
|
| 151 |
+
if not GROQ_API_KEY:
|
| 152 |
+
return None
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
import requests
|
| 156 |
+
|
| 157 |
+
response = requests.post(
|
| 158 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
| 159 |
+
headers={
|
| 160 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 161 |
+
"Content-Type": "application/json",
|
| 162 |
+
},
|
| 163 |
+
json={
|
| 164 |
+
"model": GROQ_MODEL,
|
| 165 |
+
"messages": [
|
| 166 |
+
{"role": "system", "content": system_prompt},
|
| 167 |
+
{"role": "user", "content": user_message},
|
| 168 |
+
],
|
| 169 |
+
"temperature": 0.8,
|
| 170 |
+
"max_tokens": 350,
|
| 171 |
+
"top_p": 0.9,
|
| 172 |
+
},
|
| 173 |
+
timeout=15,
|
| 174 |
+
)
|
| 175 |
+
response.raise_for_status()
|
| 176 |
+
data = response.json()
|
| 177 |
+
return data["choices"][0]["message"]["content"].strip()
|
| 178 |
+
|
| 179 |
+
except Exception as e:
|
| 180 |
+
logger.warning(f"Groq API error: {e}")
|
| 181 |
+
return None
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def _ensure_trigger(prompt: str) -> str:
|
| 185 |
+
"""Ensure the trigger word is at the start of the prompt."""
|
| 186 |
+
if not prompt.lower().startswith(TRIGGER_WORD):
|
| 187 |
+
prompt = f"{TRIGGER_WORD} {prompt}"
|
| 188 |
+
return prompt
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 192 |
+
# Public API
|
| 193 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 194 |
+
|
| 195 |
+
def build_text2img_prompt(
|
| 196 |
+
event_description: str,
|
| 197 |
+
event_type: str = "Other",
|
| 198 |
+
style: str = "Vibrant and Energetic",
|
| 199 |
+
) -> str:
|
| 200 |
+
"""Build a rich prompt for text-to-poster generation."""
|
| 201 |
+
|
| 202 |
+
style_desc = STYLE_MAP.get(style, STYLE_MAP["Vibrant and Energetic"])
|
| 203 |
+
event_hints = EVENT_TYPE_HINTS.get(event_type, EVENT_TYPE_HINTS["Other"])
|
| 204 |
+
|
| 205 |
+
user_msg = (
|
| 206 |
+
f"Create an image generation prompt for this event poster:\n"
|
| 207 |
+
f"Event: {event_description}\n"
|
| 208 |
+
f"Type: {event_type}\n"
|
| 209 |
+
f"Style: {style}\n"
|
| 210 |
+
f"Style hints: {style_desc}\n"
|
| 211 |
+
f"Thematic elements: {event_hints}\n"
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
result = _call_groq(SYSTEM_TEXT2IMG, user_msg)
|
| 215 |
+
|
| 216 |
+
if result:
|
| 217 |
+
return _ensure_trigger(result)
|
| 218 |
+
|
| 219 |
+
# Fallback without LLM
|
| 220 |
+
return _ensure_trigger(
|
| 221 |
+
f"A professional {event_type.lower()} event poster for {event_description}. "
|
| 222 |
+
f"{style_desc}. {event_hints}. "
|
| 223 |
+
f"High quality typography, well-organized layout, eye-catching design."
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def build_img2img_prompt(
|
| 228 |
+
transform_description: str,
|
| 229 |
+
style: str = "Vibrant and Energetic",
|
| 230 |
+
) -> str:
|
| 231 |
+
"""Build a prompt for img2img poster transformation."""
|
| 232 |
+
|
| 233 |
+
style_desc = STYLE_MAP.get(style, STYLE_MAP["Vibrant and Energetic"])
|
| 234 |
+
|
| 235 |
+
user_msg = (
|
| 236 |
+
f"Transform this poster with the following changes:\n"
|
| 237 |
+
f"Changes: {transform_description}\n"
|
| 238 |
+
f"New style: {style}\n"
|
| 239 |
+
f"Style hints: {style_desc}\n"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
result = _call_groq(SYSTEM_IMG2IMG, user_msg)
|
| 243 |
+
|
| 244 |
+
if result:
|
| 245 |
+
return _ensure_trigger(result)
|
| 246 |
+
|
| 247 |
+
return _ensure_trigger(
|
| 248 |
+
f"A transformed poster: {transform_description}. "
|
| 249 |
+
f"{style_desc}. Professional quality, cohesive design."
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def build_inpaint_prompt(
|
| 254 |
+
fill_description: str,
|
| 255 |
+
) -> str:
|
| 256 |
+
"""Build a prompt for inpainting a region of a poster."""
|
| 257 |
+
|
| 258 |
+
user_msg = f"Fill the masked region with: {fill_description}"
|
| 259 |
+
|
| 260 |
+
result = _call_groq(SYSTEM_INPAINT, user_msg)
|
| 261 |
+
|
| 262 |
+
if result:
|
| 263 |
+
return _ensure_trigger(result)
|
| 264 |
+
|
| 265 |
+
return _ensure_trigger(
|
| 266 |
+
f"{fill_description}. Seamless blending, consistent style."
|
| 267 |
+
)
|
deployment/requirements.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deployment Requirements – HF Space
|
| 2 |
+
|
| 3 |
+
# ===== PyTorch (CUDA) =====
|
| 4 |
+
--extra-index-url https://download.pytorch.org/whl/cu130
|
| 5 |
+
torch
|
| 6 |
+
torchvision
|
| 7 |
+
|
| 8 |
+
# ===== Core Diffusion =====
|
| 9 |
+
diffusers>=0.30.0
|
| 10 |
+
transformers>=4.40.0
|
| 11 |
+
accelerate
|
| 12 |
+
safetensors
|
| 13 |
+
peft
|
| 14 |
+
|
| 15 |
+
# ===== IP-Adapter (reference image style) =====
|
| 16 |
+
ip-adapter
|
| 17 |
+
|
| 18 |
+
# ===== Upscaling =====
|
| 19 |
+
realesrgan
|
| 20 |
+
basicsr
|
| 21 |
+
gfpgan
|
| 22 |
+
|
| 23 |
+
# ===== Image Processing =====
|
| 24 |
+
Pillow
|
| 25 |
+
opencv-python-headless
|
| 26 |
+
numpy
|
| 27 |
+
|
| 28 |
+
# ===== Web UI =====
|
| 29 |
+
gradio>=4.40.0
|
| 30 |
+
spaces
|
| 31 |
+
|
| 32 |
+
# ===== LLM API =====
|
| 33 |
+
requests
|
docs/CAMPUS-AI-PROJECT-BRIEF.md
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CAMPUS-AI: PROJECT BRIEF
|
| 2 |
+
|
| 3 |
+
## Universal Event Poster Generator
|
| 4 |
+
|
| 5 |
+
**Project**: CampusGen AI
|
| 6 |
+
**Type**: AI-powered multi-modal event poster generation
|
| 7 |
+
**Hardware**: Intel Ultra 9 275HX + RTX 5070 Ti (16GB VRAM)
|
| 8 |
+
**Deployment**: Hugging Face Spaces (ZeroGPU — Free Tier)
|
| 9 |
+
**Total Cost**: $0
|
| 10 |
+
**Last Updated**: February 21, 2026
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## EXECUTIVE SUMMARY
|
| 15 |
+
|
| 16 |
+
CampusGen AI generates professional event posters for ANY occasion in 10–15 seconds using:
|
| 17 |
+
|
| 18 |
+
- **Stable Diffusion XL 1.0 (2.6B params)** fine-tuned on **55,000+ diverse poster images** via LoRA
|
| 19 |
+
- **5 Generation Modes**: Text→Poster, Reference Image, Image Transform, Inpainting, HD Upscale
|
| 20 |
+
- **Llama 3.3 70B** (Groq) for intelligent prompt engineering
|
| 21 |
+
- **Real-ESRGAN** for 4x HD upscaling
|
| 22 |
+
- **IP-Adapter** for reference image style transfer
|
| 23 |
+
- **GPU-accelerated pipeline** end-to-end
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## WHY THIS WINS
|
| 28 |
+
|
| 29 |
+
| Metric | CampusGen AI | Typical Projects |
|
| 30 |
+
|--------|-------------|------------------|
|
| 31 |
+
| Dataset | **55,000+ images, 55 categories** | 100-500 images, 1-2 categories |
|
| 32 |
+
| Generation Modes | **5 modes** (text, reference, transform, inpaint, upscale) | 1 mode (text only) |
|
| 33 |
+
| Training | LoRA on RTX 5070 Ti (bf16) | Quantized on Colab |
|
| 34 |
+
| Intelligence | **LLM-powered** prompt engineering (10 styles, 19 event types) | Template-based |
|
| 35 |
+
| Speed | 10-15 seconds/poster | 30-60+ seconds |
|
| 36 |
+
| Upscaling | **Real-ESRGAN 4x** HD output | None |
|
| 37 |
+
| Style Transfer | **IP-Adapter** reference image | None |
|
| 38 |
+
| Cost | $0 (smart free tier) | $0-200 |
|
| 39 |
+
| Deployment | Professional 5-tab HF Space | Local/unstable |
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## TECHNOLOGY RATIONALE (Why These Models?)
|
| 44 |
+
|
| 45 |
+
| Technology | Why We Chose It | What It Replaces |
|
| 46 |
+
|------------|-----------------|------------------|
|
| 47 |
+
| **SDXL 1.0 (2.6B)** | The gold standard open-source framework for local training. It perfectly fits within a 12GB VRAM envelope allowing for rapid bf16 fine-tuning without destructive memory swapping. | Midjourney V6 / DALL-E 3 (closed source, un-finetunable) |
|
| 48 |
+
| **LoRA (Low-Rank Adaptation)** | Training a 2.6 Billion parameter model from scratch requires supercomputers. LoRA trains tiny adapter layers (**~80M parameters**) that sit on top of the frozen base model. This makes training possible in a few hours on a consumer RTX 5070 Ti (12GB) without catastrophic forgetting of the base model's knowledge. | Full Fine-Tuning (Requires multiple A100s, huge memory) |
|
| 49 |
+
| **Florence-2-large** | Microsoft's highly efficient Vision-Language Model. Instead of running 3 different models, Florence-2 does **Detailed Visual Summaries + OCR (reading text) + Dense Region Capturing** all in one pass. Clean, rich captions are the secret to teaching the SDXL model what a "poster" is. | BLIP-2 / LLaVA (bulkier, less strict OCR formatting) |
|
| 50 |
+
| **Llama 3.3 70B (via Groq)** | Users write lazy prompts like "a cybersec hackathon." We use Llama 3.3 to intercept that prompt and intelligently explode it into a highly detailed, cinematic description referencing our 10 trained visual styles and 19 event types. Running it through the Groq API makes this essentially instantaneous and free. | Hardcoded prompt templates (rigid, boring) |
|
| 51 |
+
| **IP-Adapter** | It allows users to upload a reference image (e.g., a cool poster they found online) and injects that structural/stylistic "vibe" into the generation pipeline natively, without needing a secondary text prompt. | ControlNet (heavier, overkill for pure style transfer) |
|
| 52 |
+
| **Real-ESRGAN** | A specialized upscaler neural network that reconstructs high-frequency details. Generating a 4K image directly in SDXL takes immense VRAM and time. It is faster to generate at 1024x1024 and run it through Real-ESRGAN to get a massive 4K HD output with perfectly crisp text in 2 seconds. | Bicubic interpolation (blurry, pixelated) |
|
| 53 |
+
|
| 54 |
+
--------------------------------------------------------------------------
|
| 55 |
+
|
| 56 |
+
## TRAINING SPECIFICATIONS
|
| 57 |
+
|
| 58 |
+
### Model Architecture
|
| 59 |
+
|
| 60 |
+
| Component | Specification |
|
| 61 |
+
|-----------|---------------|
|
| 62 |
+
| Base Model | Stable Diffusion XL 1.0 (2.6B parameters) — **FROZEN** |
|
| 63 |
+
| Fine-tuning | LoRA (Low-Rank Adaptation) |
|
| 64 |
+
| LoRA Rank | 32 |
|
| 65 |
+
| LoRA Alpha | 16 |
|
| 66 |
+
| LoRA Dropout | 0.05 |
|
| 67 |
+
| **Trainable Parameters** | **~80 million** (0.6% of base model) |
|
| 68 |
+
| Precision | bf16 (bfloat16) |
|
| 69 |
+
| LoRA File Size | ~150-300 MB (.safetensors) |
|
| 70 |
+
| Trigger Word | `campus_ai_poster` |
|
| 71 |
+
|
| 72 |
+
### How LoRA Works
|
| 73 |
+
|
| 74 |
+
```text
|
| 75 |
+
Base model: SDXL 1.0 (2.6B params) → FROZEN, not modified
|
| 76 |
+
↓
|
| 77 |
+
LoRA injects small adapter matrices into attention layers:
|
| 78 |
+
Original W (4096×4096) = 16M params → FROZEN
|
| 79 |
+
LoRA: A (4096×32) + B (32×4096) = 262K params → TRAINED
|
| 80 |
+
↓
|
| 81 |
+
~250 attention layers × 262K = ~80M trainable params (3% of 2.6B)
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### Training Configuration
|
| 85 |
+
|
| 86 |
+
| Parameter | Value |
|
| 87 |
+
|-----------|-------|
|
| 88 |
+
| Optimizer | AdamW 8-bit (`bitsandbytes`) |
|
| 89 |
+
| Learning Rate | 1e-4 (Phase 1) → 2e-5 (Phase 2) → **1e-5 (Phase 3)** |
|
| 90 |
+
| Batch Size | 1 |
|
| 91 |
+
| Gradient Accumulation | 4 steps |
|
| 92 |
+
| Effective Batch Size | 4 |
|
| 93 |
+
| Max Steps | 4000 (P1) + 20000 (P2) + **6448 (P3)** |
|
| 94 |
+
| Phase 3 Dataset | **6,448** highly curated typography & layout templates |
|
| 95 |
+
| Checkpoint Interval | Every 500 steps |
|
| 96 |
+
| Resolution | 1024×1024 |
|
| 97 |
+
| Noise Scheduler | DDPM |
|
| 98 |
+
| EMA Decay | 0.99 |
|
| 99 |
+
| Gradient Checkpointing | Enabled |
|
| 100 |
+
| Train UNet | Yes |
|
| 101 |
+
| Train Text Encoder | No |
|
| 102 |
+
| **Dependencies** | `bitsandbytes` (critical for 8-bit), `diffusers==0.32.1` (for `torchao` compat) |
|
| 103 |
+
| Estimated Time | ~7.5 hours on RTX 5070 Ti |
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## DATASET SPECIFICATIONS
|
| 108 |
+
|
| 109 |
+
### Overview
|
| 110 |
+
|
| 111 |
+
| Metric | Value |
|
| 112 |
+
|--------|-------|
|
| 113 |
+
| Raw images scraped | ~1900 per theme × 55 themes = **~104,500** |
|
| 114 |
+
| After quality filter | ~1300 per theme = **~71,500** |
|
| 115 |
+
| Train split | 1000 per theme = **55,000** |
|
| 116 |
+
| Validation split | 200 per theme = **11,000** |
|
| 117 |
+
| Test split | 100 per theme = **5,500** |
|
| 118 |
+
|
| 119 |
+
### 55 Categories (Hierarchical)
|
| 120 |
+
|
| 121 |
+
| Group | Subcategories |
|
| 122 |
+
|-------|---------------|
|
| 123 |
+
| **Tech Fest** | Hackathon, AI/ML, Robotics, Coding Competition, Cybersecurity, Web Dev, Startup, Data Science, IoT, Open Source, Game Dev |
|
| 124 |
+
| **Cultural Fest** | Dance, Music, Drama, Art Exhibition, Poetry, Fashion Show, Photography |
|
| 125 |
+
| **College Events** | Annual Day, Freshers Party, Farewell, Alumni Meet, Orientation, Graduation |
|
| 126 |
+
| **Sports** | Cricket, Football, Basketball, Athletics, Chess, Badminton, Volleyball |
|
| 127 |
+
| **Festivals** | Diwali, Holi, Navratri/Garba, Ganesh Chaturthi, Eid, Christmas, Onam, Pongal |
|
| 128 |
+
| **Workshops** | Technical Seminar, Business Workshop, Creative Workshop, Leadership, Research |
|
| 129 |
+
| **Social** | Blood Donation, Charity, Environmental, Awareness Campaign, NSS/NCC |
|
| 130 |
+
| **Entertainment** | DJ Night, Concert, Standup Comedy, Movie Screening, Open Mic |
|
| 131 |
+
|
| 132 |
+
### Quality Filtering (GPU-Accelerated)
|
| 133 |
+
|
| 134 |
+
| Check | Threshold | Method |
|
| 135 |
+
|-------|-----------|--------|
|
| 136 |
+
| Resolution | ≥512px shortest side | CPU |
|
| 137 |
+
| Sharpness | Laplacian variance ≥50 | **GPU** (PyTorch conv2d) |
|
| 138 |
+
| Aspect Ratio | 0.4–2.5 | CPU |
|
| 139 |
+
| File Size | 20KB–50MB | CPU |
|
| 140 |
+
| Color Variance | std ≥15 | **GPU** (torch.std) |
|
| 141 |
+
| Deduplication | pHash distance ≤5 | CPU |
|
| 142 |
+
|
| 143 |
+
### Captioning
|
| 144 |
+
|
| 145 |
+
| Component | Detail |
|
| 146 |
+
|-----------|--------|
|
| 147 |
+
| Model | Florence-2-large (microsoft) |
|
| 148 |
+
| Device | **GPU** (float16) |
|
| 149 |
+
| Captions | `campus_ai_poster` trigger + category prefix + Florence-2 description |
|
| 150 |
+
| Output | Image + `.txt` pairs in `data/final/` |
|
| 151 |
+
|
| 152 |
+
---
|
| 153 |
+
|
| 154 |
+
## DEPLOYMENT APP — 5-Tab Architecture
|
| 155 |
+
|
| 156 |
+
### Files
|
| 157 |
+
|
| 158 |
+
| File | Purpose |
|
| 159 |
+
|------|---------|
|
| 160 |
+
| `app.py` | 5-tab Gradio UI (~500 lines) |
|
| 161 |
+
| `pipelines.py` | Pipeline manager — lazy loads SDXL/IP-Adapter/ESRGAN (~230 lines) |
|
| 162 |
+
| `prompt_engine.py` | Groq LLM with 10 styles, 19 event types (~250 lines) |
|
| 163 |
+
| `requirements.txt` | HF Space dependencies |
|
| 164 |
+
| `README.md` | HF Space card |
|
| 165 |
+
|
| 166 |
+
### 5 Generation Modes
|
| 167 |
+
|
| 168 |
+
| Tab | What It Does | Key Tech |
|
| 169 |
+
|-----|-------------|----------|
|
| 170 |
+
| ✍️ Text → Poster | Describe event → get poster(s) | SDXL + LoRA + Groq LLM |
|
| 171 |
+
| 🖼️ Reference Image | Upload a poster → copy its style | IP-Adapter |
|
| 172 |
+
| 🔄 Image Transform | Upload → restyle existing poster | Img2Img pipeline |
|
| 173 |
+
| 🖌️ Inpaint / Edit | Draw mask → regenerate region | Inpainting pipeline |
|
| 174 |
+
| 🔍 HD Upscale | 2x/4x upscale any image | Real-ESRGAN |
|
| 175 |
+
|
| 176 |
+
### Shared Features
|
| 177 |
+
|
| 178 |
+
- 7 resolution presets (768×1152, 1024×1024, etc.)
|
| 179 |
+
- 10 visual styles
|
| 180 |
+
- Batch generation (1-4 variants)
|
| 181 |
+
- Seed control
|
| 182 |
+
- LoRA strength slider
|
| 183 |
+
- Generation metadata display
|
| 184 |
+
|
| 185 |
+
### VRAM Management
|
| 186 |
+
|
| 187 |
+
- Only ONE pipeline active at a time (text2img OR img2img OR inpaint)
|
| 188 |
+
- Model CPU offloading for 16GB GPU / HF ZeroGPU
|
| 189 |
+
- IP-Adapter loads as lightweight adapter (~300MB) on top of base model
|
| 190 |
+
- Real-ESRGAN uses tiled processing (512px tiles) for memory efficiency
|
| 191 |
+
|
| 192 |
+
---
|
| 193 |
+
|
| 194 |
+
## GPU PIPELINE SUMMARY
|
| 195 |
+
|
| 196 |
+
| Step | Device | Time |
|
| 197 |
+
|------|--------|------|
|
| 198 |
+
| Scraping (Pinterest) | 🖥️ CPU (network-bound) | ~6-12h |
|
| 199 |
+
| Quality Filter | 🎮 GPU (Laplacian + color) | ~5 min |
|
| 200 |
+
| Captioning (Florence-2) | 🎮 GPU (float16) | ~6-12h |
|
| 201 |
+
| Dataset Split | 🖥️ CPU (file copy) | ~1 min |
|
| 202 |
+
| LoRA Training | 🎮 GPU (bf16) | ~7.5h |
|
| 203 |
+
| Upload to HF | 🖥️ CPU | ~5 min |
|
| 204 |
+
| Live Demo | ☁️ Cloud GPU (ZeroGPU) | Real-time |
|
| 205 |
+
|
| 206 |
+
---
|
| 207 |
+
|
| 208 |
+
## EXECUTION PIPELINE
|
| 209 |
+
|
| 210 |
+
```bash
|
| 211 |
+
# Phase 1: Data Collection
|
| 212 |
+
python scripts/pinterest_scraper.py # 🖥️ CPU — overnight
|
| 213 |
+
|
| 214 |
+
# Phase 2: Data Processing
|
| 215 |
+
python scripts/quality_filter.py # 🎮 GPU — ~5 min
|
| 216 |
+
python scripts/caption_generator.py # 🎮 GPU — overnight
|
| 217 |
+
python scripts/split_dataset.py # 🖥️ CPU — ~1 min
|
| 218 |
+
|
| 219 |
+
# Phase 3: Training (Dual-Phase)
|
| 220 |
+
python scripts/create_training_config.py # 🖥️ CPU — Setup
|
| 221 |
+
python ai-toolkit/run.py configs/train_sdxl_lora.yaml # 🎮 GPU — Phase 1 (3h)
|
| 222 |
+
python ai-toolkit/run.py configs/train_sdxl_lora_phase2.yaml # 🎮 GPU — Phase 2 (4.5h)
|
| 223 |
+
|
| 224 |
+
# Phase 4: Deploy
|
| 225 |
+
huggingface-cli upload YOUR_USERNAME/campus-ai-poster-sdxl models/sdxl/checkpoints/campus_ai_poster_sdxl/ .
|
| 226 |
+
# Push deployment/ to HF Space
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
## FILE STRUCTURE
|
| 232 |
+
|
| 233 |
+
```text
|
| 234 |
+
campus-ai/
|
| 235 |
+
├── .gitignore # Explicitly ignores data/ & models/ for GitHub push
|
| 236 |
+
├── configs/
|
| 237 |
+
│ ├── config.yaml # Master configuration (w/ hf_token)
|
| 238 |
+
│ ├── train_sdxl_lora.yaml # ai-toolkit Phase 1 generator
|
| 239 |
+
│ └── train_sdxl_lora_phase3.yaml # Phase 3 implicit layout tuner
|
| 240 |
+
├── scripts/
|
| 241 |
+
│ ├── pinterest_scraper.py # Image scraper (1900/theme)
|
| 242 |
+
│ ├── quality_filter.py # GPU-accelerated quality filter
|
| 243 |
+
│ ├── caption_generator.py # Florence-2 GPU captioning
|
| 244 |
+
│ ├── split_dataset.py # Fixed 1000/200/100 split
|
| 245 |
+
│ ├── test_checkpoint.py # LoRA inference testing
|
| 246 |
+
│ └── create_training_config.py # ai-toolkit config generator
|
| 247 |
+
├── deployment/
|
| 248 |
+
│ ├── app.py # 5-tab Gradio app
|
| 249 |
+
│ ├── pipelines.py # Pipeline manager
|
| 250 |
+
│ ├── prompt_engine.py # Groq LLM prompt engine
|
| 251 |
+
│ ├── requirements.txt # HF Space dependencies
|
| 252 |
+
│ └── README.md # HF Space card
|
| 253 |
+
├── data/
|
| 254 |
+
│ ├── raw/ # ~104K scraped images
|
| 255 |
+
│ ├── processed/ # ~71K quality-filtered
|
| 256 |
+
│ ├── final/ # Captioned pairs
|
| 257 |
+
│ ├── train/ # 55K (1000/theme)
|
| 258 |
+
│ ├── val/ # 11K (200/theme)
|
| 259 |
+
│ └── test/ # 5.5K (100/theme)
|
| 260 |
+
├── models/sdxl/checkpoints/ # Trained LoRA weights
|
| 261 |
+
├── docs/
|
| 262 |
+
│ ├── CAMPUS-AI-PROJECT-BRIEF.md # This file
|
| 263 |
+
│ ├── README.md # Project overview
|
| 264 |
+
│ ├── SETUP.md # Setup guide
|
| 265 |
+
│ └── PIPELINE.md # Execution pipeline
|
| 266 |
+
└── requirements.txt # Local dependencies
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
---
|
| 270 |
+
|
| 271 |
+
## COMPETITION STRATEGY
|
| 272 |
+
|
| 273 |
+
### What Judges Will See
|
| 274 |
+
|
| 275 |
+
1. **Live 5-tab demo** on Hugging Face (not just slides)
|
| 276 |
+
2. **55,000+ image dataset** (10-100x larger than competitors)
|
| 277 |
+
3. **5 generation modes** (competitors have 1)
|
| 278 |
+
4. **GPU-accelerated pipeline** (professional engineering)
|
| 279 |
+
5. **$0 deployment** (smart architecture)
|
| 280 |
+
|
| 281 |
+
### Key Talking Points
|
| 282 |
+
|
| 283 |
+
- "Trained on 55,000+ event posters across 55 categories — 10x larger than typical projects"
|
| 284 |
+
- "5 generation modes: text, reference image, transform, inpaint, upscale"
|
| 285 |
+
- "80 million trainable parameters via LoRA on 2.6 billion parameter SDXL model"
|
| 286 |
+
- "GPU-accelerated pipeline: quality filter, captioning, and training all on GPU"
|
| 287 |
+
- "Zero cost — entire project runs on free tier services"
|
| 288 |
+
|
| 289 |
+
### Tough Questions
|
| 290 |
+
|
| 291 |
+
**Q: "Only 80M params? That seems small."**
|
| 292 |
+
A: "That's the power of LoRA — we get the quality of a 2.6B model while only training 80M adapter parameters. The base model already knows how to generate images; our LoRA teaches it our specific poster style. Bigger ≠ better — efficiency is the innovation."
|
| 293 |
+
|
| 294 |
+
**Q: "How is this different from MidJourney?"**
|
| 295 |
+
A: "MidJourney is generic. Ours is specialized — trained on 55,000 Indian event posters. It understands rangoli patterns, tech fest aesthetics, and college event culture. Plus, 5 generation modes including reference image style transfer and inpainting."
|
| 296 |
+
|
| 297 |
+
**Q: "Can judges try it live?"**
|
| 298 |
+
A: "Absolutely — here's the HF Space link. Pick any event, any style. Generate in 15 seconds."
|
| 299 |
+
|
| 300 |
+
---
|
| 301 |
+
|
| 302 |
+
## SUCCESS METRICS
|
| 303 |
+
|
| 304 |
+
| Metric | Target | Status |
|
| 305 |
+
|--------|--------|--------|
|
| 306 |
+
| Dataset | 55K+ captioned images | ✅ Complete |
|
| 307 |
+
| Training | Loss <0.10, coherent samples | ⏳ Pending |
|
| 308 |
+
| Generation | <20 seconds, professional quality | ⏳ Pending |
|
| 309 |
+
| Deployment | Live 5-tab HF Space | ⏳ Pending |
|
| 310 |
+
| Demo | All 5 tabs working flawlessly | ⏳ Pending |
|
| 311 |
+
|
| 312 |
+
---
|
| 313 |
+
|
| 314 |
+
**Version**: 4.1
|
| 315 |
+
**Last Updated**: February 22, 2026
|
| 316 |
+
**Status**: Dataset captioned ✅ → Training LoRA on RTX 5070 Ti 🔄
|
docs/NOVELTY.md
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Campus-AI — Novelty & Unique Value Proposition
|
| 2 |
+
|
| 3 |
+
**by CounciL**
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## One-Liner
|
| 8 |
+
|
| 9 |
+
> *Campus-AI is the first domain-specific diffusion model fine-tuned on 71,000+ Indian campus event posters across 57 cultural subcategories, combining state-of-the-art LoRA+ training (ICML 2024) with an intelligent prompt engine to generate culturally-aware event posters accessible on consumer hardware.*
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## 1. Novel Dataset (First of Its Kind)
|
| 14 |
+
|
| 15 |
+
No public dataset exists for Indian campus event posters. Campus-AI constructs one from scratch:
|
| 16 |
+
|
| 17 |
+
- **71,000+ curated base images** expanding dynamically to ~130,000+ total scraped from Pinterest via distributed Selenium workers.
|
| 18 |
+
- **57 distinct micro-subcategories** encompassing granular Indian culture (e.g., *Navratri Garba*, *Pongal*, *Hackathon UI*).
|
| 19 |
+
- **O(1) Global Perceptual Hash (PHash) Caching:**
|
| 20 |
+
- Standard scrapers download blind duplicates. We engineer an **SQLite-backed PHash cache** that computes a 64-bit fingerprint of every image.
|
| 21 |
+
- As scrapers run across 57 categories, they achieve **O(1) time complexity** deduplication lookups against a living 130k+ database.
|
| 22 |
+
- Zero cross-contamination: Guarantees absolute mathematical uniqueness of every new image entering the pipeline.
|
| 23 |
+
- **Strict Tuning Data Isolation (Phase 3 Strictness):**
|
| 24 |
+
- Fine-tuning requires flawless data. We built a recursive Selenium scraper that dynamically fetches deeper DOM loads until it achieves **exactly 100 mathematically unique images** per tuning subcategory. Any overlap with the base 130k database triggers an immediate rejection.
|
| 25 |
+
- **GPU-Accelerated Real-Time Quality Filtering:**
|
| 26 |
+
- Evaluates Laplacian variance (sharpness), color histograms, and native resolution. Drops blurry or irrelevant data before it even hits the disk.
|
| 27 |
+
- **Florence-2 VLM Multi-Modal Captioning:**
|
| 28 |
+
- Utilizes Microsoft's State-of-the-Art Vision-Language Model (`microsoft/Florence-2-large`) initialized in `bfloat16` to generate dense, composition-aware captions (e.g., detailing typography placement and lighting).
|
| 29 |
+
|
| 30 |
+
| Category | Subcategories | Examples |
|
| 31 |
+
|----------|:---:|---------|
|
| 32 |
+
| Festivals | 11 | Diwali, Holi, Durga Puja, Eid, Navratri, Onam, Pongal |
|
| 33 |
+
| Cultural Fest | 8 | Dance, Music, Drama, Fashion Show, Stand-up Comedy |
|
| 34 |
+
| Sports | 9 | Cricket, Kabaddi/Kho, Football, Esports, Yoga |
|
| 35 |
+
| Tech Fest | 7 | Hackathon, AI/ML, Cybersecurity, Robotics |
|
| 36 |
+
| Workshops | 7 | Placement, Coding, Design, Business, Seminar |
|
| 37 |
+
| College Events | 6 | Fresher's, Farewell, Annual Fest, Graduation |
|
| 38 |
+
| Social | 4 | Blood Donation, Awareness, Charity, Environment |
|
| 39 |
+
| Entertainment | 3 | Food Fest, Gaming, Movie Night |
|
| 40 |
+
| Styles | 2 | Minimalist, Neon Glow |
|
| 41 |
+
|
| 42 |
+
*This dataset alone is a publishable contribution to the research community.*
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## 2. Novel Application Domain
|
| 47 |
+
|
| 48 |
+
No existing AI model — commercial or open-source — is specifically trained for Indian campus event posters. Generic models (Midjourney, DALL-E, Stable Diffusion) lack training data on:
|
| 49 |
+
|
| 50 |
+
- Indian festival visual language (rangoli, diyas, kolam, torans)
|
| 51 |
+
- Campus-specific poster conventions (event dates, venue formats, college branding)
|
| 52 |
+
- Regional cultural diversity (North vs. South vs. East Indian aesthetics)
|
| 53 |
+
|
| 54 |
+
Campus-AI is the **first domain-specific solution** for this underserved market of 40,000+ Indian colleges and universities.
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## 3. End-to-End Pipeline Engineering
|
| 59 |
+
|
| 60 |
+
Most AI projects use pre-existing datasets. Campus-AI builds the **full ML pipeline from scratch**:
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
Pinterest Scraper → Quality Filter → Florence-2 Captioner → Dataset Splitter
|
| 64 |
+
→ LoRA Training (SDXL 1.0) → Gradio Deployment
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
Each stage is purpose-built:
|
| 68 |
+
|
| 69 |
+
| Stage | Technology | Key Innovation |
|
| 70 |
+
|-------|-----------|---------------|
|
| 71 |
+
| Scraping | Headless Selenium + SQLite PHash Caching | **Algorithmic Crawling:** Defeats anti-bot measures while executing O(1) mathematical deduplication against a 130k+ local SQLite cache to prevent data overlap. |
|
| 72 |
+
| Filtering | GPU-accelerated Laplacian | Real-time sharpness + color analysis |
|
| 73 |
+
| Captioning | Microsoft Florence-2-Large (bf16 + torch.compile) | **VLM Pipeline:** 300% faster batch inference via SM120 hardware optimizations; produces dense compositional data rather than standard tags. |
|
| 74 |
+
| Training | Custom ai-toolkit branch via LoRA+ | **Curriculum Learning:** 2-phase training isolating macro-layout in Phase 1, and micro-aesthetic refinement in Phase 2. |
|
| 75 |
+
| Deployment | Gradio + ZeroGPU | Free-tier cloud with local fallback |
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## 4. State-of-the-Art Training Algorithm Stack
|
| 80 |
+
|
| 81 |
+
Campus-AI combines **five cutting-edge techniques**, each from recent research, into one optimized training pipeline:
|
| 82 |
+
|
| 83 |
+
No existing LoRA trainer combines all five. The synergy between self-adapting LR (Prodigy), balanced loss (Min-SNR-γ), and periodic restarts is a **novel training configuration**.
|
| 84 |
+
|
| 85 |
+
| Technique | Source | Year | What It Does |
|
| 86 |
+
|-----------|--------|:---:|-------------|
|
| 87 |
+
| **Dual-Phase Curriculum** | Fine-to-Coarse ML theory | 2024 practice | Phase 1 (1e-4) learns macro layout; Phase 2 (2e-5) refines micro details without catastrophic forgetting |
|
| 88 |
+
| **LoRA+** | ICML paper | 2024 | 16× higher LR for B matrix → +2% accuracy, 2× faster convergence, zero extra cost |
|
| 89 |
+
| **Prodigy Optimizer** | Community best practice | 2024 | Self-adapting learning rate — eliminates manual LR tuning across 57 diverse categories |
|
| 90 |
+
| **Min-SNR-γ Loss** | "Efficient Diffusion Training" | 2023 | Balanced learning across all noise levels → prevents memorization, improves generalization |
|
| 91 |
+
| **Cosine Scheduler** | Standard Practice | 2024 practice | Smooth LR decay with no restarts for stable high-frequency detail learning in Phase 2 |
|
| 92 |
+
| **SM120 Blackwell Optimizations** | Hardware-specific | 2025 | TF32 tensor cores, torch.compile max-autotune, bf16 native precision |
|
| 93 |
+
|
| 94 |
+
No existing LoRA trainer combines all five. The synergy between self-adapting LR (Prodigy), balanced loss (Min-SNR-γ), and periodic restarts is a **novel training configuration**.
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
## 5. Intelligent Prompt Engineering
|
| 99 |
+
|
| 100 |
+
Campus-AI uses **Groq Llama 3.3 70B** (~1,200-1,500 tokens/sec) to transform simple user input into detailed, SDXL-optimized prompts:
|
| 101 |
+
|
| 102 |
+
```
|
| 103 |
+
User: "tech fest poster for IIT"
|
| 104 |
+
Llama 3.3: "A vibrant, high-energy technology festival poster for an IIT campus,
|
| 105 |
+
featuring circuit board patterns, holographic UI elements, neon blue
|
| 106 |
+
and electric purple gradients, bold modern typography reading 'TECH FEST
|
| 107 |
+
2026', robotic arms and AI neural network visualizations, dark background
|
| 108 |
+
with glowing particle effects, professional event poster layout"
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
This eliminates the **prompt engineering barrier** — users don't need to learn SDXL's prompt syntax.
|
| 112 |
+
|
| 113 |
+
---
|
| 114 |
+
|
| 115 |
+
## 6. Multi-Modal Generation (4-in-1)
|
| 116 |
+
|
| 117 |
+
Most poster AIs offer only text-to-image. Campus-AI offers four generation modes:
|
| 118 |
+
|
| 119 |
+
| Mode | Technology | Use Case |
|
| 120 |
+
|------|-----------|----------|
|
| 121 |
+
| **Text → Poster** | StableDiffusionXLPipeline | Generate from description alone |
|
| 122 |
+
| **Reference Image** | IP-Adapter | Copy style from uploaded poster |
|
| 123 |
+
| **Image → Image** | StableDiffusionXLImg2ImgPipeline | Transform/restyle existing designs |
|
| 124 |
+
| **Inpainting** | StableDiffusionXLInpaintPipeline | Edit specific regions of a poster |
|
| 125 |
+
| **Dynamic Typography** | Smart Zone Detection + PIL | 100% native integration of text without black boxes or clipping |
|
| 126 |
+
|
| 127 |
+
Plus **Real-ESRGAN 2× upscaling** for HD output.
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
## 7. Accessible by Design
|
| 132 |
+
|
| 133 |
+
| Metric | Campus-AI | Midjourney | DALL-E 3 | Canva AI |
|
| 134 |
+
|--------|-----------|------------|----------|----------|
|
| 135 |
+
| **Cost** | Free | $10-60/mo | $20/mo | $13/mo |
|
| 136 |
+
| **GPU required** | 12GB consumer | Cloud (their servers) | Cloud | N/A |
|
| 137 |
+
| **Privacy** | Your data stays local | Uploaded to their servers | Uploaded | Uploaded |
|
| 138 |
+
| **Open source** | ✅ Full pipeline | ❌ Proprietary | ❌ Proprietary | ❌ Proprietary |
|
| 139 |
+
| **Customizable** | ✅ Retrain on your data | ❌ | ❌ | ❌ |
|
| 140 |
+
|
| 141 |
+
---
|
| 142 |
+
|
| 143 |
+
## 8. Performance Metrics
|
| 144 |
+
|
| 145 |
+
### Prompt Engine (Groq Llama 3.3 70B)
|
| 146 |
+
|
| 147 |
+
| Metric | Value |
|
| 148 |
+
|--------|-------|
|
| 149 |
+
| Inference speed | ~1,200-1,500 tokens/sec |
|
| 150 |
+
| Output per prompt | ~150-200 tokens |
|
| 151 |
+
| End-to-end latency | ~150-200ms |
|
| 152 |
+
|
| 153 |
+
### Image Generation (SDXL 1.0 + LoRA)
|
| 154 |
+
|
| 155 |
+
| Metric | Local (12GB VRAM) | Cloud (A100) |
|
| 156 |
+
|--------|-------------------|-------------|
|
| 157 |
+
| Steps/sec | ~0.5-1.0 it/s | ~3-5 it/s |
|
| 158 |
+
| Time per image (28 steps) | ~30-60 sec | ~6-10 sec |
|
| 159 |
+
| Resolution | Up to 1152×768 | Up to 1152×768 |
|
| 160 |
+
|
| 161 |
+
### Data Pipeline
|
| 162 |
+
|
| 163 |
+
| Stage | Speed |
|
| 164 |
+
|-------|-------|
|
| 165 |
+
| Quality filtering | ~50-100 images/sec (GPU) |
|
| 166 |
+
| Florence-2 captioning | ~3-5 images/sec (bf16 + torch.compile) |
|
| 167 |
+
| Real-ESRGAN upscaling | ~5 sec per image |
|
| 168 |
+
|
| 169 |
+
---
|
| 170 |
+
|
| 171 |
+
## 9. Planned Post-Training Evaluation (Quantitative Novelty)
|
| 172 |
+
|
| 173 |
+
### 9a. FID & CLIP Score Comparison
|
| 174 |
+
|
| 175 |
+
| Comparison | What It Proves |
|
| 176 |
+
|-----------|---------------|
|
| 177 |
+
| Base SDXL vs. Campus-AI on Indian prompts | Fine-tuning significantly improves domain-specific quality |
|
| 178 |
+
| Campus-AI vs. generic SDXL on Indian prompts | LoRA fine-tuning outperforms base model on domain tasks |
|
| 179 |
+
|
| 180 |
+
> Lower FID = more realistic images. Higher CLIP score = better prompt adherence.
|
| 181 |
+
|
| 182 |
+
### 9b. User Study (Blind Evaluation)
|
| 183 |
+
|
| 184 |
+
Planned study with 20-30 students rating posters blindly:
|
| 185 |
+
|
| 186 |
+
| Source | Criteria |
|
| 187 |
+
|--------|----------|
|
| 188 |
+
| Campus-AI | Cultural relevance, visual quality, poster layout |
|
| 189 |
+
| Midjourney | Same prompts, same criteria |
|
| 190 |
+
| Canva templates | Same event type |
|
| 191 |
+
|
| 192 |
+
> If Campus-AI wins on "cultural relevance" — that's publishable hard evidence.
|
| 193 |
+
|
| 194 |
+
### 9c. Ablation Study
|
| 195 |
+
|
| 196 |
+
Remove each technique individually to prove contribution:
|
| 197 |
+
|
| 198 |
+
| Experiment | Expected Result |
|
| 199 |
+
|-----------|----------------|
|
| 200 |
+
| Without Min-SNR-γ | Worse on high-noise timesteps, inconsistent quality |
|
| 201 |
+
| Without caption dropout | Overfitting — struggles with novel prompts |
|
| 202 |
+
| Without LoRA+ | Slower convergence (~2× more steps needed) |
|
| 203 |
+
| Without cosine restarts | Stuck in local minima — less diversity |
|
| 204 |
+
| Without Prodigy | Wrong LR hurts some categories |
|
| 205 |
+
|
| 206 |
+
> This proves each component is necessary, not arbitrary.
|
| 207 |
+
|
| 208 |
+
---
|
| 209 |
+
|
| 210 |
+
## Technical Differentiation Summary
|
| 211 |
+
|
| 212 |
+
| Aspect | Generic AI | Campus-AI |
|
| 213 |
+
|--------|-----------|-----------|
|
| 214 |
+
| Indian cultural awareness | ❌ Western-biased | ✅ 57 Indian subcategories |
|
| 215 |
+
| Campus event context | ❌ No training data | ✅ 71K+ curated posters |
|
| 216 |
+
| Prompt intelligence | ❌ Manual prompt craft | ✅ Llama 3.3 auto-enhances |
|
| 217 |
+
| Generation modes | Text-to-image only | 4 modes + upscaling |
|
| 218 |
+
| Cost | $10-60/month | Free |
|
| 219 |
+
| Data pipeline | Pre-existing datasets | Custom scrape-to-deploy |
|
| 220 |
+
| Training techniques | Unknown/proprietary | SOTA open research (LoRA+, Min-SNR-γ) |
|
| 221 |
+
| Reproducibility | ❌ Closed source | ✅ Fully reproducible |
|
| 222 |
+
|
| 223 |
+
---
|
| 224 |
+
|
| 225 |
+
*Campus-AI by Council Strategic Solutions — Built for the Indian campus community*
|
docs/PIPELINE.md
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CampusGen AI — Full Execution Pipeline
|
| 2 |
+
|
| 3 |
+
> Step-by-step guide from raw data to live hackathon demo.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Phase 1: Data Collection (Raw Ingestion) 🖥️ CPU
|
| 8 |
+
|
| 9 |
+
⏱️ **Runtime Strategy:** ~6-12 hours across distributed local instances (Run Async)
|
| 10 |
+
⚙️ **Hardware Requirement:** standard CPU, high bandwidth connection, 500GB+ NVMe SSD recommended.
|
| 11 |
+
|
| 12 |
+
```bash
|
| 13 |
+
cd e:\campus-ai
|
| 14 |
+
python scripts/pinterest_scraper.py
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
- Downloads **1900 images per theme** across 55 categories
|
| 18 |
+
- Saves to `data/raw/` with hierarchical folders (`tech_fest/hackathon/`, etc.)
|
| 19 |
+
- **Global Deduplication:** Uses a custom `GlobalImageDeduplicator` employing Perceptual Hashing (PHash) and a high-performance SQLite caching layer (`data/phash_cache.db`). Scans ~130,000+ existing images instantly to ensure zero duplicates across the entire corpus.
|
| 20 |
+
- Skips already-downloaded images safely — safe to restart
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## Phase 1.5: Tuning Dataset Collection 🕸️ CPU
|
| 25 |
+
|
| 26 |
+
⏱️ ~1-2 hours (Targeted run)
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
cd e:\campus-ai
|
| 30 |
+
python scripts/pinterest_tuning_scraper.py
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
- **Strict Enforcement Engine:** Uses a heavily modified Selenium scraper that recursively scrolls and cycles through search queries until it achieves strictly **100 unique images** per 55 specific subcategories.
|
| 34 |
+
- **Data Isolation:** Saves uniquely to `data/tuning/<category>/<subcategory>/`.
|
| 35 |
+
- **Absolute Uniqueness:** Pipes newly scraped images through the identical `GlobalImageDeduplicator` cache, guaranteeing these 5,500 tuning images have absolutely zero overlap with the 100k+ images in the main `data/raw`, `data/train`, or `data/val` datasets.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## Phase 2: Data Processing & Quality Assurance
|
| 40 |
+
|
| 41 |
+
### 2a. Quality Filter 🎮 GPU (~5 min)
|
| 42 |
+
|
| 43 |
+
⚙️ **Algorithm:** Offloads Canny Edge / Laplacian Variance calculations to CUDA to rapidly sweep 130k+ images for optimal sharpness and color contrast.
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
python scripts/quality_filter.py
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
Removes blurry, low-res, duplicate images → saves to `data/processed/`
|
| 50 |
+
|
| 51 |
+
### 2b. Caption Generation 🎮 GPU (~6-12 hours)
|
| 52 |
+
|
| 53 |
+
⚙️ **Model Architecture:** Microsoft `Florence-2-large` via HuggingFace `transformers`.
|
| 54 |
+
⚙️ **Hardware Target:** RTX 4070 Ti / 5070 Ti (Float16 precision, ~12GB VRAM allocation).
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
python scripts/caption_generator.py
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
- Transforms pixel data into rich spatial text (e.g., "Bold sans-serif typography on the top left, neon cyber-punk background, dates on bottom right"). Saves `.txt` pairs to `data/final/`. These pairs are critical for SDXL cross-attention during LoRA tuning.
|
| 61 |
+
|
| 62 |
+
### 2c. Dataset Split 🖥️ CPU (~1 min)
|
| 63 |
+
|
| 64 |
+
⚙️ **Logic:** Deterministic pseudo-random seed to guarantee identical splits across team machines.
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
python scripts/split_dataset.py
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Splits into **1000 train / 200 val / 100 test** per theme → `data/train/`, `data/val/`, `data/test/`
|
| 71 |
+
|
| 72 |
+
---
|
| 73 |
+
|
| 74 |
+
## Phase 3: Fine-Tune LoRA 🎮 GPU (~7-8 hours total)
|
| 75 |
+
|
| 76 |
+
**Core Training Engine:** `ai-toolkit` featuring LoRA+ optimization. Employs a dual-phase curriculum to circumvent catastrophic forgetting while molding the SDXL 1.0 architecture.
|
| 77 |
+
|
| 78 |
+
### 3a. Phase 1: Layout Pass (~3 hours)
|
| 79 |
+
|
| 80 |
+
- **Objective:** Teaches the model the macro-composition, layout, and lighting of the 55 event categories.
|
| 81 |
+
- **Data Source:** Exclusively uses `data/train/` (to preserve validation sets for Phase 2).
|
| 82 |
+
|
| 83 |
+
```bash
|
| 84 |
+
# 1. Generate optimal JSON layout training config
|
| 85 |
+
python scripts/create_training_config.py
|
| 86 |
+
|
| 87 |
+
# 2. Train Layout Pass (Learning Rate: 1e-4)
|
| 88 |
+
python ai-toolkit/run.py configs/train_sdxl_lora.yaml
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
Output: `models/sdxl/checkpoints/campus_ai_poster_sdxl/campus_ai_poster_sdxl.safetensors`
|
| 92 |
+
|
| 93 |
+
### 3b. Phase 2: Perfection Pass (~4.5 hours)
|
| 94 |
+
|
| 95 |
+
- **Objective:** Bakes in micro-details, sharp Indian cultural textures (e.g., diwali lamps, specific fonts), and perfect aesthetic adherence.
|
| 96 |
+
- **Mechanics:** Resumes gracefully from the Phase 1 `.safetensors` weights. Drops learning rate sequentially (2e-5) while utilizing the full 100% data blend (`train`, `val`, `test`).
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
# Train Perfection Pass (Internal Checkpoint Resume)
|
| 100 |
+
python ai-toolkit/run.py configs/train_sdxl_lora_phase2.yaml
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
Output: Overwrites the `.safetensors` with the high-fidelity weights.
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Phase 4: Upload to Hugging Face 🖥️ CPU
|
| 108 |
+
|
| 109 |
+
### 4a. Install & Login
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
pip install huggingface-hub[cli]
|
| 113 |
+
huggingface-cli login
|
| 114 |
+
# Paste your token from https://huggingface.co/settings/tokens
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### 4b. Upload LoRA Weights
|
| 118 |
+
|
| 119 |
+
```bash
|
| 120 |
+
huggingface-cli upload YOUR_USERNAME/campus-ai-poster-sdxl models/sdxl/checkpoints/campus_ai_poster_sdxl/ .
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
### 4c. Create & Deploy HF Space
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
cd deployment
|
| 127 |
+
git init
|
| 128 |
+
huggingface-cli repo create campus-ai-poster-generator --type space --space-sdk gradio
|
| 129 |
+
git remote add space https://huggingface.co/spaces/YOUR_USERNAME/campus-ai-poster-generator
|
| 130 |
+
git add app.py pipelines.py prompt_engine.py requirements.txt README.md
|
| 131 |
+
git commit -m "Deploy CampusGen AI"
|
| 132 |
+
git push space main
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### 4d. Add Secrets (on HF website)
|
| 136 |
+
|
| 137 |
+
Go to **Space Settings → Variables and Secrets** and add:
|
| 138 |
+
|
| 139 |
+
| Secret Name | Value |
|
| 140 |
+
|---------------|----------------------|
|
| 141 |
+
| `HF_USERNAME` | your HF username |
|
| 142 |
+
| `GROQ_API_KEY` | your Groq API key |
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## Phase 5: Test Live ☁️ Cloud GPU
|
| 147 |
+
|
| 148 |
+
Open `https://huggingface.co/spaces/YOUR_USERNAME/campus-ai-poster-generator` and test all 5 tabs.
|
| 149 |
+
|
| 150 |
+
---
|
| 151 |
+
|
| 152 |
+
## HF Free vs Pro
|
| 153 |
+
|
| 154 |
+
| Feature | Free | Pro ($9/mo) |
|
| 155 |
+
|---------|------|-------------|
|
| 156 |
+
| ZeroGPU (shared A100) | ✅ Low priority | ✅ High priority |
|
| 157 |
+
| Private Spaces | ❌ | ✅ |
|
| 158 |
+
| Persistent Storage | ❌ | ✅ |
|
| 159 |
+
| Cold start | Slower | Faster |
|
| 160 |
+
|
| 161 |
+
**Verdict: Free tier works for a hackathon demo.** Upgrade to Pro only if the queue is too slow during judging.
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
## Quick Reference
|
| 166 |
+
|
| 167 |
+
```
|
| 168 |
+
pinterest_scraper.py → data/raw/ (1900 images/theme)
|
| 169 |
+
pinterest_tuning_scraper.py → data/tuning/ (Strictly 100 entirely unique images/theme)
|
| 170 |
+
image_deduplicator.py → data/phash_cache.db (O(1) lookups via SQLite PHash)
|
| 171 |
+
quality_filter.py → data/processed/ (~1300 quality-passed/theme)
|
| 172 |
+
caption_generator.py → data/final/ (image + caption pairs)
|
| 173 |
+
split_dataset.py → data/train/val/test/ (1000/200/100)
|
| 174 |
+
create_training_config.py → configs/train_sdxl_lora.yaml
|
| 175 |
+
ai-toolkit/run.py → configs/train_sdxl_lora.yaml (Phase 1 Layout)
|
| 176 |
+
ai-toolkit/run.py → configs/train_sdxl_lora_phase2.yaml (Phase 2 Detail)
|
| 177 |
+
test_checkpoint.py → poster_compositor.py (SDXL Art + PIL Typography)
|
| 178 |
+
deployment/app.py → HF Space (live demo for judges)
|
| 179 |
+
```
|
docs/README.md
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CampusGen AI – Universal Event Poster Generator
|
| 2 |
+
|
| 3 |
+
> AI-powered event poster generation for any occasion in 10–15 seconds.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
CampusGen AI generates professional event posters using:
|
| 8 |
+
|
| 9 |
+
- **Stable Diffusion XL 1.0** fine-tuned on 55,000+ diverse poster images via LoRA
|
| 10 |
+
- **Llama 3.3 70B** (Groq) for natural language event understanding
|
| 11 |
+
- **5 Generation Modes**: Text→Poster, Reference Image, Img2Img, Inpainting, HD Upscale
|
| 12 |
+
- **GPU-accelerated pipeline** from data processing to training
|
| 13 |
+
- **Zero cost** deployment on Hugging Face Spaces (ZeroGPU)
|
| 14 |
+
|
| 15 |
+
## Architecture
|
| 16 |
+
|
| 17 |
+
```text
|
| 18 |
+
User Input → Groq LLM (prompt engineering) → SDXL 1.0 + LoRA → HD Upscale → Poster
|
| 19 |
+
↑
|
| 20 |
+
IP-Adapter (reference style)
|
| 21 |
+
Img2Img (transform)
|
| 22 |
+
Inpainting (edit regions)
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
| Component | Details |
|
| 26 |
+
|-----------|---------|
|
| 27 |
+
| Base Model | Stable Diffusion XL 1.0 (2.6B params) |
|
| 28 |
+
| Fine-tuning | Dual-Phase LoRA rank 32, bf16, 55K+ images |
|
| 29 |
+
| Curriculum | Phase 1 (Layout/1e-4) → Phase 2 (Perfection/2e-5) |
|
| 30 |
+
| Dataset | 55,000+ curated event posters, 55 categories |
|
| 31 |
+
| LLM | Llama 3.3 70B via Groq (free tier) |
|
| 32 |
+
| Upscaler | Real-ESRGAN 4x |
|
| 33 |
+
| Deployment | HF Spaces with ZeroGPU |
|
| 34 |
+
|
| 35 |
+
## Categories (55 themes)
|
| 36 |
+
|
| 37 |
+
| Group | Subcategories |
|
| 38 |
+
|-------|--------------|
|
| 39 |
+
| Tech Fest | Hackathons, AI/ML, robotics, coding competitions, cyber security |
|
| 40 |
+
| Cultural Event | Dance, music, drama, art exhibitions, poetry |
|
| 41 |
+
| College Events | Annual days, freshers, farewell, alumni meets |
|
| 42 |
+
| Sports | Cricket, football, basketball, athletics, chess |
|
| 43 |
+
| Festivals | Diwali, Holi, Navratri, Ganesh Chaturthi, Eid, Christmas |
|
| 44 |
+
| Workshops | Seminars, webinars, training sessions, conferences |
|
| 45 |
+
| Social | Blood donation, charity, environmental drives |
|
| 46 |
+
| Entertainment | DJ nights, concerts, standup comedy, movie screenings |
|
| 47 |
+
|
| 48 |
+
## Project Structure
|
| 49 |
+
|
| 50 |
+
```text
|
| 51 |
+
campus-ai/
|
| 52 |
+
├── configs/
|
| 53 |
+
│ └── config.yaml # Master configuration
|
| 54 |
+
├── scripts/
|
| 55 |
+
│ ├── pinterest_scraper.py # Image scraper (CPU, network-bound)
|
| 56 |
+
│ ├── quality_filter.py # GPU-accelerated quality filtering
|
| 57 |
+
│ ├── caption_generator.py # Florence-2 captioning (GPU)
|
| 58 |
+
│ ├── split_dataset.py # Dataset splitting (1000/200/100)
|
| 59 |
+
│ ├── test_checkpoint.py # LoRA inference testing
|
| 60 |
+
│ └── create_training_config.py # ai-toolkit config generator
|
| 61 |
+
├── deployment/
|
| 62 |
+
│ ├── app.py # 5-tab Gradio application
|
| 63 |
+
│ ├── pipelines.py # Pipeline manager (SDXL/IP-Adapter/ESRGAN)
|
| 64 |
+
│ ├── prompt_engine.py # Groq LLM prompt engineering
|
| 65 |
+
│ ├── requirements.txt # HF Space dependencies
|
| 66 |
+
│ └── README.md # HF Space card
|
| 67 |
+
├── data/
|
| 68 |
+
│ ├── raw/ # Scraped images (~1900/theme)
|
| 69 |
+
│ ├── processed/ # GPU-filtered images (~1300/theme)
|
| 70 |
+
│ ├── final/ # Captioned dataset (GPU)
|
| 71 |
+
│ ├── train/ # 1000 images/theme
|
| 72 |
+
│ ├── val/ # 200 images/theme
|
| 73 |
+
│ └── test/ # 100 images/theme
|
| 74 |
+
├── models/ # Trained LoRA checkpoints
|
| 75 |
+
├── outputs/ # Generated outputs
|
| 76 |
+
├── docs/
|
| 77 |
+
│ ├── README.md # This file
|
| 78 |
+
│ ├── SETUP.md # Setup guide
|
| 79 |
+
│ └── PIPELINE.md # Execution pipeline
|
| 80 |
+
└── requirements.txt # Local dependencies
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## Quick Start
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
# 1. Setup
|
| 87 |
+
conda create -n campus-ai python=3.11
|
| 88 |
+
conda activate campus-ai
|
| 89 |
+
pip install -r requirements.txt
|
| 90 |
+
|
| 91 |
+
# 2. Data Pipeline
|
| 92 |
+
python scripts/pinterest_scraper.py # 🖥️ CPU — Scrape posters (overnight)
|
| 93 |
+
python scripts/quality_filter.py # 🎮 GPU — Filter quality (~5 min)
|
| 94 |
+
python scripts/caption_generator.py # 🎮 GPU — Generate captions (overnight)
|
| 95 |
+
python scripts/split_dataset.py # 🖥️ CPU — Split 1000/200/100
|
| 96 |
+
|
| 97 |
+
# 3. Training
|
| 98 |
+
python scripts/create_training_config.py # 🖥️ CPU — Generate ai-toolkit config
|
| 99 |
+
cd ai-toolkit && python run.py ../configs/train_sdxl_lora.yaml # 🎮 GPU — Phase 1 (Layout)
|
| 100 |
+
cd ai-toolkit && python run.py ../configs/train_sdxl_lora_phase2.yaml # 🎮 GPU — Phase 2 (Perfection)
|
| 101 |
+
|
| 102 |
+
# 4. Deploy
|
| 103 |
+
huggingface-cli upload YOUR_USERNAME/campus-ai-poster-sdxl models/sdxl/lora/ . # Upload LoRA
|
| 104 |
+
# Push deployment/ files to HF Space
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
See [SETUP.md](SETUP.md) for detailed instructions. See [PIPELINE.md](PIPELINE.md) for step-by-step execution guide.
|
| 108 |
+
|
| 109 |
+
## Hardware
|
| 110 |
+
|
| 111 |
+
- **GPU**: NVIDIA RTX 5070 Ti (12GB VRAM) — used for quality filtering, captioning, training
|
| 112 |
+
- **CPU**: Intel Ultra 9 275HX (24 cores) — used for scraping, splitting
|
| 113 |
+
- **RAM**: 32GB
|
| 114 |
+
- **Training time**: ~7.5 hours (Phase 1 Layout + Phase 2 Perfection)
|
| 115 |
+
|
| 116 |
+
## Author
|
| 117 |
+
|
| 118 |
+
**M Runeet Kumar** – Ashta/Indore, MP, India
|
| 119 |
+
|
| 120 |
+
## License
|
| 121 |
+
|
| 122 |
+
MIT
|
docs/SETUP.md
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CampusGen AI – Setup Guide
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
|
| 5 |
+
- **OS**: Windows 10/11 or Ubuntu 22.04+
|
| 6 |
+
- **Python**: 3.11+
|
| 7 |
+
- **GPU**: NVIDIA GPU with 12GB+ VRAM (RTX 5070 Ti used for development)
|
| 8 |
+
- **CUDA**: 12.1+ with matching drivers
|
| 9 |
+
- **Disk**: 100GB+ free space
|
| 10 |
+
- **Chrome**: Latest version (for Pinterest scraping)
|
| 11 |
+
|
| 12 |
+
## 1. Environment Setup
|
| 13 |
+
|
| 14 |
+
```bash
|
| 15 |
+
# Create conda environment
|
| 16 |
+
conda create -n campus-ai python=3.11 -y
|
| 17 |
+
conda activate campus-ai
|
| 18 |
+
|
| 19 |
+
# Install dependencies
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
|
| 22 |
+
# Verify GPU
|
| 23 |
+
python -c "import torch; print(f'CUDA: {torch.cuda.is_available()}, GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"N/A\"}')"
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
## 2. Configuration
|
| 27 |
+
|
| 28 |
+
Edit `configs/config.yaml`:
|
| 29 |
+
|
| 30 |
+
```yaml
|
| 31 |
+
project:
|
| 32 |
+
creator: "YOUR_NAME" # ← Change this
|
| 33 |
+
|
| 34 |
+
deployment:
|
| 35 |
+
hf_username: "YOUR_HF_USERNAME" # ← Change this
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### API Keys
|
| 39 |
+
|
| 40 |
+
| Service | Where to Get | Config Key |
|
| 41 |
+
|---------|-------------|------------|
|
| 42 |
+
| Kaggle | kaggle.com/settings | `api_keys.kaggle` |
|
| 43 |
+
| Unsplash | unsplash.com/developers | `api_keys.unsplash` |
|
| 44 |
+
| Pexels | pexels.com/api | `api_keys.pexels` |
|
| 45 |
+
| Groq | console.groq.com | Environment: `GROQ_API_KEY` |
|
| 46 |
+
| HuggingFace | huggingface.co/settings/tokens | CLI: `huggingface-cli login` |
|
| 47 |
+
|
| 48 |
+
## 3. Data Pipeline
|
| 49 |
+
|
| 50 |
+
### Step 1: Scrape Images 🖥️ CPU (~6-12 hours)
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
python scripts/pinterest_scraper.py
|
| 54 |
+
# Or scrape a single category:
|
| 55 |
+
python scripts/pinterest_scraper.py
|
| 56 |
+
# Or scrape a single category:
|
| 57 |
+
python scripts/pinterest_scraper.py --category tech_fest
|
| 58 |
+
# Or targeted top-up for specific counts:
|
| 59 |
+
python scripts/pinterest_scraper.py --category workshops/coding --target 2800
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
**Output**: `data/raw/{category}/{subcategory}/` with ~1900 images per theme
|
| 63 |
+
|
| 64 |
+
### Step 2: Quality Filter 🎮 GPU (~5 min)
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
python scripts/quality_filter.py
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Uses GPU-accelerated sharpness detection (Laplacian via PyTorch CUDA) and color analysis. Auto-detects GPU, falls back to CPU.
|
| 71 |
+
|
| 72 |
+
**Output**: `data/processed/{category}/` with ~1300+ high-quality images per theme
|
| 73 |
+
|
| 74 |
+
### Step 3: Caption Generation 🎮 GPU (~6-12 hours)
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
python scripts/caption_generator.py
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Florence-2 runs in float16 on GPU. Includes `campus_ai_poster` trigger word and category-aware prefixes.
|
| 81 |
+
|
| 82 |
+
**Output**: `data/final/{category}/` with image + `.txt` caption pairs + `metadata.json`
|
| 83 |
+
|
| 84 |
+
### Step 4: Dataset Split 🖥️ CPU (~1 min)
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
python scripts/split_dataset.py
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
Fixed counts: **1000 train / 200 val / 100 test** per theme.
|
| 91 |
+
|
| 92 |
+
**Output**: `data/train/`, `data/val/`, `data/test/`
|
| 93 |
+
|
| 94 |
+
## 4. Training 🎮 GPU (~7.5 hours total)
|
| 95 |
+
|
| 96 |
+
### Install ai-toolkit
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
git clone https://github.com/ostris/ai-toolkit.git
|
| 100 |
+
cd ai-toolkit
|
| 101 |
+
pip install -e .
|
| 102 |
+
cd ..
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
### Phase 1: Layout Pass (~3 hours)
|
| 106 |
+
|
| 107 |
+
Generates the initial configuration and trains block-in composition.
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
python scripts/create_training_config.py
|
| 111 |
+
# Outputs: configs/train_sdxl_lora.yaml
|
| 112 |
+
|
| 113 |
+
cd ai-toolkit
|
| 114 |
+
set HF_TOKEN=your_token_here
|
| 115 |
+
python run.py ../configs/train_sdxl_lora.yaml
|
| 116 |
+
cd ..
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
### Phase 2: Perfection Pass (~4.5 hours)
|
| 120 |
+
|
| 121 |
+
Uses the static `configs/train_sdxl_lora_phase2.yaml` (0.1 dropout, 2e-5 LR) to refine micro-details across the entire dataset (train/val/test).
|
| 122 |
+
|
| 123 |
+
```bash
|
| 124 |
+
cd ai-toolkit
|
| 125 |
+
set HF_TOKEN=your_token_here
|
| 126 |
+
python run.py ../configs/train_sdxl_lora_phase2.yaml
|
| 127 |
+
cd ..
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
### Monitor
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
# In a separate terminal
|
| 134 |
+
nvidia-smi -l 30
|
| 135 |
+
|
| 136 |
+
# TensorBoard
|
| 137 |
+
tensorboard --logdir logs/tensorboard
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
### Test Checkpoints
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
python scripts/test_checkpoint.py
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
## 5. Deployment 🖥️ CPU → ☁️ Cloud
|
| 147 |
+
|
| 148 |
+
### Upload LoRA to Hugging Face
|
| 149 |
+
|
| 150 |
+
```bash
|
| 151 |
+
huggingface-cli login
|
| 152 |
+
huggingface-cli upload YOUR_USERNAME/campus-ai-poster-sdxl models/sdxl/checkpoints/campus_ai_poster_sdxl/ .
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
### Create & Deploy HF Space
|
| 156 |
+
|
| 157 |
+
```bash
|
| 158 |
+
cd deployment
|
| 159 |
+
git init
|
| 160 |
+
huggingface-cli repo create campus-ai-poster-generator --type space --space-sdk gradio
|
| 161 |
+
git remote add space https://huggingface.co/spaces/YOUR_USERNAME/campus-ai-poster-generator
|
| 162 |
+
git add app.py pipelines.py prompt_engine.py requirements.txt README.md
|
| 163 |
+
git commit -m "Deploy CampusGen AI"
|
| 164 |
+
git push space main
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
### Configure Secrets
|
| 168 |
+
|
| 169 |
+
In Space Settings → Variables and Secrets:
|
| 170 |
+
|
| 171 |
+
| Secret Name | Value |
|
| 172 |
+
|-------------|-------|
|
| 173 |
+
| `HF_USERNAME` | your HF username |
|
| 174 |
+
| `GROQ_API_KEY` | your Groq API key |
|
| 175 |
+
|
| 176 |
+
## GPU Usage Summary
|
| 177 |
+
|
| 178 |
+
| Step | Device | Time |
|
| 179 |
+
|------|--------|------|
|
| 180 |
+
| Scraping | 🖥️ CPU | ~6-12h (network-bound) |
|
| 181 |
+
| Quality Filter | 🎮 GPU | ~5 min |
|
| 182 |
+
| Captioning | 🎮 GPU | ~6-12h |
|
| 183 |
+
| Split | 🖥️ CPU | ~1 min |
|
| 184 |
+
| Training (Phase 1) | 🎮 GPU | ~3h |
|
| 185 |
+
| Training (Phase 2) | 🎮 GPU | ~4.5h |
|
| 186 |
+
| Upload | 🖥️ CPU | ~5 min |
|
| 187 |
+
| Live Demo | ☁️ Cloud GPU | HF ZeroGPU |
|
| 188 |
+
|
| 189 |
+
## Troubleshooting
|
| 190 |
+
|
| 191 |
+
| Issue | Solution |
|
| 192 |
+
|-------|----------|
|
| 193 |
+
| CUDA OOM during training | Set `batch_size: 1` and `gradient_accumulation_steps: 4` in config |
|
| 194 |
+
| Pinterest blocking | Increase sleep time, use VPN, or try alt sources |
|
| 195 |
+
| Blurry outputs | Increase `num_inference_steps` to 40 |
|
| 196 |
+
| Slow cold start on HF | Send Space link 24h before demo to warm it up |
|
| 197 |
+
| Groq rate limit | Create multiple accounts, rotate API keys |
|
| 198 |
+
| GPU not detected | Verify CUDA install: `python -c "import torch; print(torch.cuda.is_available())"` |
|
docs/architecture.html
ADDED
|
@@ -0,0 +1,1004 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>Campus-AI — Architecture | CounciL</title>
|
| 8 |
+
<link
|
| 9 |
+
href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap"
|
| 10 |
+
rel="stylesheet">
|
| 11 |
+
<style>
|
| 12 |
+
:root {
|
| 13 |
+
--bg: #06070f;
|
| 14 |
+
--card: rgba(14, 17, 38, 0.85);
|
| 15 |
+
--border: rgba(120, 140, 255, 0.12);
|
| 16 |
+
--blue: #4f8ff7;
|
| 17 |
+
--purple: #8b5cf6;
|
| 18 |
+
--pink: #ec4899;
|
| 19 |
+
--green: #22d3ee;
|
| 20 |
+
--orange: #f59e0b;
|
| 21 |
+
--text: #c8d6f0;
|
| 22 |
+
--muted: #5a6488;
|
| 23 |
+
--glow-blue: rgba(79, 143, 247, 0.35);
|
| 24 |
+
--glow-purple: rgba(139, 92, 246, 0.35);
|
| 25 |
+
--glow-pink: rgba(236, 72, 153, 0.35);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
* {
|
| 29 |
+
margin: 0;
|
| 30 |
+
padding: 0;
|
| 31 |
+
box-sizing: border-box;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
body {
|
| 35 |
+
background: var(--bg);
|
| 36 |
+
font-family: 'Space Grotesk', sans-serif;
|
| 37 |
+
color: var(--text);
|
| 38 |
+
overflow-x: hidden;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
/* Animated background grid */
|
| 42 |
+
body::before {
|
| 43 |
+
content: '';
|
| 44 |
+
position: fixed;
|
| 45 |
+
inset: 0;
|
| 46 |
+
background:
|
| 47 |
+
linear-gradient(rgba(79, 143, 247, 0.03) 1px, transparent 1px),
|
| 48 |
+
linear-gradient(90deg, rgba(79, 143, 247, 0.03) 1px, transparent 1px);
|
| 49 |
+
background-size: 60px 60px;
|
| 50 |
+
animation: gridMove 20s linear infinite;
|
| 51 |
+
z-index: 0;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
@keyframes gridMove {
|
| 55 |
+
0% {
|
| 56 |
+
transform: translate(0, 0);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
100% {
|
| 60 |
+
transform: translate(60px, 60px);
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
/* Ambient orbs */
|
| 65 |
+
.orb {
|
| 66 |
+
position: fixed;
|
| 67 |
+
border-radius: 50%;
|
| 68 |
+
filter: blur(100px);
|
| 69 |
+
opacity: 0.15;
|
| 70 |
+
z-index: 0;
|
| 71 |
+
animation: float 15s ease-in-out infinite alternate;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.orb-1 {
|
| 75 |
+
width: 500px;
|
| 76 |
+
height: 500px;
|
| 77 |
+
background: var(--blue);
|
| 78 |
+
top: -100px;
|
| 79 |
+
left: -100px;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.orb-2 {
|
| 83 |
+
width: 400px;
|
| 84 |
+
height: 400px;
|
| 85 |
+
background: var(--purple);
|
| 86 |
+
top: 40%;
|
| 87 |
+
right: -100px;
|
| 88 |
+
animation-delay: -5s;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.orb-3 {
|
| 92 |
+
width: 450px;
|
| 93 |
+
height: 450px;
|
| 94 |
+
background: var(--pink);
|
| 95 |
+
bottom: -100px;
|
| 96 |
+
left: 30%;
|
| 97 |
+
animation-delay: -10s;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
@keyframes float {
|
| 101 |
+
0% {
|
| 102 |
+
transform: translate(0, 0) scale(1);
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
100% {
|
| 106 |
+
transform: translate(40px, 30px) scale(1.1);
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
.container {
|
| 111 |
+
max-width: 1300px;
|
| 112 |
+
margin: 0 auto;
|
| 113 |
+
padding: 50px 30px;
|
| 114 |
+
position: relative;
|
| 115 |
+
z-index: 1;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
/* ═══ HEADER ═══ */
|
| 119 |
+
.header {
|
| 120 |
+
text-align: center;
|
| 121 |
+
margin-bottom: 60px;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.header .badge {
|
| 125 |
+
display: inline-block;
|
| 126 |
+
padding: 6px 18px;
|
| 127 |
+
border-radius: 50px;
|
| 128 |
+
font-size: 0.7rem;
|
| 129 |
+
font-weight: 600;
|
| 130 |
+
letter-spacing: 2px;
|
| 131 |
+
text-transform: uppercase;
|
| 132 |
+
border: 1px solid rgba(139, 92, 246, 0.3);
|
| 133 |
+
color: var(--purple);
|
| 134 |
+
background: rgba(139, 92, 246, 0.08);
|
| 135 |
+
margin-bottom: 20px;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.header h1 {
|
| 139 |
+
font-size: 3rem;
|
| 140 |
+
font-weight: 700;
|
| 141 |
+
line-height: 1.1;
|
| 142 |
+
background: linear-gradient(135deg, #fff 0%, #4f8ff7 40%, #8b5cf6 60%, #ec4899 100%);
|
| 143 |
+
-webkit-background-clip: text;
|
| 144 |
+
-webkit-text-fill-color: transparent;
|
| 145 |
+
margin-bottom: 12px;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.header p {
|
| 149 |
+
color: var(--muted);
|
| 150 |
+
font-size: 1.05rem;
|
| 151 |
+
max-width: 500px;
|
| 152 |
+
margin: 0 auto;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
/* ═══ PHASE SECTIONS ═══ */
|
| 156 |
+
.phase {
|
| 157 |
+
margin-bottom: 24px;
|
| 158 |
+
position: relative;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.phase-header {
|
| 162 |
+
display: flex;
|
| 163 |
+
align-items: center;
|
| 164 |
+
gap: 14px;
|
| 165 |
+
margin-bottom: 24px;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.phase-number {
|
| 169 |
+
width: 36px;
|
| 170 |
+
height: 36px;
|
| 171 |
+
border-radius: 10px;
|
| 172 |
+
display: flex;
|
| 173 |
+
align-items: center;
|
| 174 |
+
justify-content: center;
|
| 175 |
+
font-weight: 700;
|
| 176 |
+
font-size: 0.85rem;
|
| 177 |
+
flex-shrink: 0;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.p1 .phase-number {
|
| 181 |
+
background: rgba(79, 143, 247, 0.15);
|
| 182 |
+
color: var(--blue);
|
| 183 |
+
border: 1px solid rgba(79, 143, 247, 0.3);
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.p2 .phase-number {
|
| 187 |
+
background: rgba(139, 92, 246, 0.15);
|
| 188 |
+
color: var(--purple);
|
| 189 |
+
border: 1px solid rgba(139, 92, 246, 0.3);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
.p3 .phase-number {
|
| 193 |
+
background: rgba(236, 72, 153, 0.15);
|
| 194 |
+
color: var(--pink);
|
| 195 |
+
border: 1px solid rgba(236, 72, 153, 0.3);
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.phase-title {
|
| 199 |
+
font-size: 1.1rem;
|
| 200 |
+
font-weight: 600;
|
| 201 |
+
color: #fff;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.phase-desc {
|
| 205 |
+
font-size: 0.78rem;
|
| 206 |
+
color: var(--muted);
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
/* ═══ FLOW (horizontal cards with arrows) ═══ */
|
| 210 |
+
.flow {
|
| 211 |
+
display: flex;
|
| 212 |
+
align-items: center;
|
| 213 |
+
gap: 0;
|
| 214 |
+
overflow-x: auto;
|
| 215 |
+
padding-bottom: 8px;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.card {
|
| 219 |
+
background: var(--card);
|
| 220 |
+
border: 1px solid var(--border);
|
| 221 |
+
border-radius: 16px;
|
| 222 |
+
padding: 24px 22px;
|
| 223 |
+
min-width: 200px;
|
| 224 |
+
backdrop-filter: blur(20px);
|
| 225 |
+
transition: all 0.35s cubic-bezier(0.4, 0, 0.2, 1);
|
| 226 |
+
position: relative;
|
| 227 |
+
overflow: hidden;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.card::before {
|
| 231 |
+
content: '';
|
| 232 |
+
position: absolute;
|
| 233 |
+
top: 0;
|
| 234 |
+
left: 0;
|
| 235 |
+
right: 0;
|
| 236 |
+
height: 2px;
|
| 237 |
+
border-radius: 16px 16px 0 0;
|
| 238 |
+
opacity: 0;
|
| 239 |
+
transition: opacity 0.35s;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
.card:hover {
|
| 243 |
+
transform: translateY(-4px);
|
| 244 |
+
border-color: rgba(120, 140, 255, 0.3);
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
.card:hover::before {
|
| 248 |
+
opacity: 1;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
.p1 .card::before {
|
| 252 |
+
background: linear-gradient(90deg, var(--blue), var(--green));
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.p1 .card:hover {
|
| 256 |
+
box-shadow: 0 8px 40px rgba(79, 143, 247, 0.12);
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.p2 .card::before {
|
| 260 |
+
background: linear-gradient(90deg, var(--purple), var(--blue));
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.p2 .card:hover {
|
| 264 |
+
box-shadow: 0 8px 40px rgba(139, 92, 246, 0.12);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.p3 .card::before {
|
| 268 |
+
background: linear-gradient(90deg, var(--pink), var(--orange));
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
.p3 .card:hover {
|
| 272 |
+
box-shadow: 0 8px 40px rgba(236, 72, 153, 0.12);
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
.card-icon {
|
| 276 |
+
font-size: 2rem;
|
| 277 |
+
margin-bottom: 12px;
|
| 278 |
+
display: block;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.card-name {
|
| 282 |
+
font-size: 0.92rem;
|
| 283 |
+
font-weight: 600;
|
| 284 |
+
color: #fff;
|
| 285 |
+
margin-bottom: 6px;
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
.card-detail {
|
| 289 |
+
font-size: 0.73rem;
|
| 290 |
+
color: var(--muted);
|
| 291 |
+
line-height: 1.5;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.card-tag {
|
| 295 |
+
display: inline-block;
|
| 296 |
+
margin-top: 10px;
|
| 297 |
+
padding: 3px 10px;
|
| 298 |
+
border-radius: 6px;
|
| 299 |
+
font-size: 0.62rem;
|
| 300 |
+
font-weight: 600;
|
| 301 |
+
font-family: 'JetBrains Mono', monospace;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
/* Flow arrows */
|
| 305 |
+
.flow-arrow {
|
| 306 |
+
display: flex;
|
| 307 |
+
align-items: center;
|
| 308 |
+
justify-content: center;
|
| 309 |
+
padding: 0 6px;
|
| 310 |
+
flex-shrink: 0;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
.flow-arrow svg {
|
| 314 |
+
width: 40px;
|
| 315 |
+
height: 20px;
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
.flow-arrow line,
|
| 319 |
+
.flow-arrow polyline {
|
| 320 |
+
stroke: var(--muted);
|
| 321 |
+
stroke-width: 1.5;
|
| 322 |
+
fill: none;
|
| 323 |
+
stroke-dasharray: 4 3;
|
| 324 |
+
animation: dashFlow 1.5s linear infinite;
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
@keyframes dashFlow {
|
| 328 |
+
0% {
|
| 329 |
+
stroke-dashoffset: 0;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
100% {
|
| 333 |
+
stroke-dashoffset: -14;
|
| 334 |
+
}
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
/* Big down arrow between phases */
|
| 338 |
+
.phase-connector {
|
| 339 |
+
display: flex;
|
| 340 |
+
justify-content: center;
|
| 341 |
+
padding: 16px 0;
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
.phase-connector svg {
|
| 345 |
+
width: 24px;
|
| 346 |
+
height: 50px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.phase-connector line {
|
| 350 |
+
stroke: rgba(139, 92, 246, 0.3);
|
| 351 |
+
stroke-width: 1.5;
|
| 352 |
+
stroke-dasharray: 4 3;
|
| 353 |
+
animation: dashDown 1.5s linear infinite;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
.phase-connector polygon {
|
| 357 |
+
fill: rgba(139, 92, 246, 0.4);
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
@keyframes dashDown {
|
| 361 |
+
0% {
|
| 362 |
+
stroke-dashoffset: 0;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
100% {
|
| 366 |
+
stroke-dashoffset: -14;
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
/* ═══ TRAINING - special 3-col layout ═══ */
|
| 371 |
+
.training-layout {
|
| 372 |
+
display: grid;
|
| 373 |
+
grid-template-columns: 1fr 1.8fr 1fr;
|
| 374 |
+
gap: 20px;
|
| 375 |
+
align-items: start;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.train-core {
|
| 379 |
+
background: linear-gradient(145deg, rgba(30, 20, 60, 0.9), rgba(14, 10, 35, 0.95));
|
| 380 |
+
border: 1.5px solid rgba(139, 92, 246, 0.25);
|
| 381 |
+
border-radius: 20px;
|
| 382 |
+
padding: 32px 28px;
|
| 383 |
+
text-align: center;
|
| 384 |
+
position: relative;
|
| 385 |
+
overflow: hidden;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.train-core::after {
|
| 389 |
+
content: '';
|
| 390 |
+
position: absolute;
|
| 391 |
+
inset: -1px;
|
| 392 |
+
border-radius: 20px;
|
| 393 |
+
background: linear-gradient(135deg, rgba(139, 92, 246, 0.15), transparent 50%, rgba(79, 143, 247, 0.1));
|
| 394 |
+
z-index: 0;
|
| 395 |
+
pointer-events: none;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
.train-core>* {
|
| 399 |
+
position: relative;
|
| 400 |
+
z-index: 1;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
.train-core .card-icon {
|
| 404 |
+
font-size: 3rem;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
.train-core .card-name {
|
| 408 |
+
font-size: 1.3rem;
|
| 409 |
+
color: var(--purple);
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
.lora-badge {
|
| 413 |
+
display: inline-block;
|
| 414 |
+
margin-top: 14px;
|
| 415 |
+
padding: 8px 20px;
|
| 416 |
+
border-radius: 10px;
|
| 417 |
+
background: rgba(139, 92, 246, 0.12);
|
| 418 |
+
border: 1px solid rgba(139, 92, 246, 0.25);
|
| 419 |
+
font-family: 'JetBrains Mono', monospace;
|
| 420 |
+
font-size: 0.75rem;
|
| 421 |
+
color: var(--purple);
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
.train-specs {
|
| 425 |
+
display: grid;
|
| 426 |
+
grid-template-columns: 1fr 1fr;
|
| 427 |
+
gap: 8px;
|
| 428 |
+
margin-top: 16px;
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.spec {
|
| 432 |
+
background: rgba(139, 92, 246, 0.06);
|
| 433 |
+
border-radius: 8px;
|
| 434 |
+
padding: 8px 10px;
|
| 435 |
+
text-align: center;
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.spec-val {
|
| 439 |
+
font-family: 'JetBrains Mono', monospace;
|
| 440 |
+
font-size: 0.8rem;
|
| 441 |
+
font-weight: 600;
|
| 442 |
+
color: #fff;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
.spec-label {
|
| 446 |
+
font-size: 0.6rem;
|
| 447 |
+
color: var(--muted);
|
| 448 |
+
text-transform: uppercase;
|
| 449 |
+
letter-spacing: 0.5px;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
.side-stack {
|
| 453 |
+
display: flex;
|
| 454 |
+
flex-direction: column;
|
| 455 |
+
gap: 12px;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.side-card {
|
| 459 |
+
background: var(--card);
|
| 460 |
+
border: 1px solid var(--border);
|
| 461 |
+
border-radius: 12px;
|
| 462 |
+
padding: 14px 16px;
|
| 463 |
+
backdrop-filter: blur(20px);
|
| 464 |
+
transition: all 0.3s;
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
.side-card:hover {
|
| 468 |
+
border-color: rgba(139, 92, 246, 0.3);
|
| 469 |
+
transform: translateX(4px);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
.side-card-title {
|
| 473 |
+
display: flex;
|
| 474 |
+
align-items: center;
|
| 475 |
+
gap: 8px;
|
| 476 |
+
font-size: 0.82rem;
|
| 477 |
+
font-weight: 600;
|
| 478 |
+
color: #fff;
|
| 479 |
+
margin-bottom: 4px;
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
.side-card-title .emoji {
|
| 483 |
+
font-size: 1.1rem;
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
.side-card-detail {
|
| 487 |
+
font-size: 0.68rem;
|
| 488 |
+
color: var(--muted);
|
| 489 |
+
line-height: 1.5;
|
| 490 |
+
padding-left: 28px;
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
/* ═══ INFERENCE - 3 col ═══ */
|
| 494 |
+
.inference-layout {
|
| 495 |
+
display: grid;
|
| 496 |
+
grid-template-columns: 240px 1fr 200px;
|
| 497 |
+
gap: 20px;
|
| 498 |
+
align-items: center;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
.inf-input {
|
| 502 |
+
display: flex;
|
| 503 |
+
flex-direction: column;
|
| 504 |
+
gap: 14px;
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
.inf-output {
|
| 508 |
+
display: flex;
|
| 509 |
+
flex-direction: column;
|
| 510 |
+
gap: 14px;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
.inf-engine {
|
| 514 |
+
background: linear-gradient(145deg, rgba(50, 15, 40, 0.85), rgba(20, 8, 25, 0.9));
|
| 515 |
+
border: 1.5px solid rgba(236, 72, 153, 0.2);
|
| 516 |
+
border-radius: 20px;
|
| 517 |
+
padding: 28px 24px;
|
| 518 |
+
position: relative;
|
| 519 |
+
overflow: hidden;
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
.inf-engine::after {
|
| 523 |
+
content: '';
|
| 524 |
+
position: absolute;
|
| 525 |
+
inset: -1px;
|
| 526 |
+
border-radius: 20px;
|
| 527 |
+
background: linear-gradient(135deg, rgba(236, 72, 153, 0.1), transparent 50%, rgba(245, 158, 11, 0.08));
|
| 528 |
+
z-index: 0;
|
| 529 |
+
pointer-events: none;
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
.inf-engine>* {
|
| 533 |
+
position: relative;
|
| 534 |
+
z-index: 1;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
.engine-label {
|
| 538 |
+
text-align: center;
|
| 539 |
+
font-size: 0.72rem;
|
| 540 |
+
text-transform: uppercase;
|
| 541 |
+
letter-spacing: 2px;
|
| 542 |
+
color: var(--pink);
|
| 543 |
+
font-weight: 600;
|
| 544 |
+
margin-bottom: 18px;
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
.modes {
|
| 548 |
+
display: grid;
|
| 549 |
+
grid-template-columns: 1fr 1fr;
|
| 550 |
+
gap: 10px;
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
.mode {
|
| 554 |
+
background: rgba(236, 72, 153, 0.06);
|
| 555 |
+
border: 1px solid rgba(236, 72, 153, 0.12);
|
| 556 |
+
border-radius: 12px;
|
| 557 |
+
padding: 16px 12px;
|
| 558 |
+
text-align: center;
|
| 559 |
+
transition: all 0.3s;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.mode:hover {
|
| 563 |
+
background: rgba(236, 72, 153, 0.12);
|
| 564 |
+
border-color: rgba(236, 72, 153, 0.3);
|
| 565 |
+
transform: scale(1.03);
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
.mode-icon {
|
| 569 |
+
font-size: 1.4rem;
|
| 570 |
+
margin-bottom: 6px;
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
.mode-name {
|
| 574 |
+
font-size: 0.78rem;
|
| 575 |
+
font-weight: 600;
|
| 576 |
+
color: #fff;
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
.mode-sub {
|
| 580 |
+
font-size: 0.62rem;
|
| 581 |
+
color: var(--muted);
|
| 582 |
+
margin-top: 2px;
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
.engine-footer {
|
| 586 |
+
text-align: center;
|
| 587 |
+
margin-top: 14px;
|
| 588 |
+
font-size: 0.65rem;
|
| 589 |
+
color: var(--muted);
|
| 590 |
+
font-family: 'JetBrains Mono', monospace;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
/* Result card glow */
|
| 594 |
+
.result-card {
|
| 595 |
+
border-color: rgba(34, 211, 238, 0.25) !important;
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
.result-card:hover {
|
| 599 |
+
box-shadow: 0 8px 40px rgba(34, 211, 238, 0.12) !important;
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
.result-card .card-name {
|
| 603 |
+
color: var(--green);
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
/* ═══ DEPLOY BAR ═══ */
|
| 607 |
+
.deploy {
|
| 608 |
+
display: flex;
|
| 609 |
+
gap: 16px;
|
| 610 |
+
justify-content: center;
|
| 611 |
+
margin-top: 28px;
|
| 612 |
+
flex-wrap: wrap;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
.deploy-chip {
|
| 616 |
+
display: flex;
|
| 617 |
+
align-items: center;
|
| 618 |
+
gap: 8px;
|
| 619 |
+
padding: 10px 22px;
|
| 620 |
+
border-radius: 50px;
|
| 621 |
+
font-size: 0.78rem;
|
| 622 |
+
font-weight: 500;
|
| 623 |
+
transition: all 0.3s;
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
.deploy-chip:hover {
|
| 627 |
+
transform: scale(1.05);
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
.chip-local {
|
| 631 |
+
background: rgba(34, 211, 238, 0.08);
|
| 632 |
+
border: 1px solid rgba(34, 211, 238, 0.2);
|
| 633 |
+
color: var(--green);
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
.chip-cloud {
|
| 637 |
+
background: rgba(79, 143, 247, 0.08);
|
| 638 |
+
border: 1px solid rgba(79, 143, 247, 0.2);
|
| 639 |
+
color: var(--blue);
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
/* ═══ STATS BAR ═══ */
|
| 643 |
+
.stats-bar {
|
| 644 |
+
display: flex;
|
| 645 |
+
justify-content: center;
|
| 646 |
+
gap: 40px;
|
| 647 |
+
margin-top: 50px;
|
| 648 |
+
padding: 30px 0;
|
| 649 |
+
border-top: 1px solid var(--border);
|
| 650 |
+
flex-wrap: wrap;
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
.stat {
|
| 654 |
+
text-align: center;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
.stat-value {
|
| 658 |
+
font-size: 1.6rem;
|
| 659 |
+
font-weight: 700;
|
| 660 |
+
font-family: 'JetBrains Mono', monospace;
|
| 661 |
+
background: linear-gradient(135deg, var(--blue), var(--purple));
|
| 662 |
+
-webkit-background-clip: text;
|
| 663 |
+
-webkit-text-fill-color: transparent;
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
.stat-label {
|
| 667 |
+
font-size: 0.65rem;
|
| 668 |
+
color: var(--muted);
|
| 669 |
+
text-transform: uppercase;
|
| 670 |
+
letter-spacing: 1.5px;
|
| 671 |
+
margin-top: 4px;
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
.footer {
|
| 675 |
+
text-align: center;
|
| 676 |
+
margin-top: 40px;
|
| 677 |
+
font-size: 0.72rem;
|
| 678 |
+
color: var(--muted);
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
.footer span {
|
| 682 |
+
color: var(--pink);
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
/* Responsive */
|
| 686 |
+
@media (max-width: 900px) {
|
| 687 |
+
|
| 688 |
+
.training-layout,
|
| 689 |
+
.inference-layout {
|
| 690 |
+
grid-template-columns: 1fr;
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
.flow {
|
| 694 |
+
flex-wrap: wrap;
|
| 695 |
+
justify-content: center;
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
.flow-arrow {
|
| 699 |
+
transform: rotate(90deg);
|
| 700 |
+
}
|
| 701 |
+
}
|
| 702 |
+
</style>
|
| 703 |
+
</head>
|
| 704 |
+
|
| 705 |
+
<body>
|
| 706 |
+
|
| 707 |
+
<div class="orb orb-1"></div>
|
| 708 |
+
<div class="orb orb-2"></div>
|
| 709 |
+
<div class="orb orb-3"></div>
|
| 710 |
+
|
| 711 |
+
<div class="container">
|
| 712 |
+
|
| 713 |
+
<!-- HEADER -->
|
| 714 |
+
<div class="header">
|
| 715 |
+
<div class="badge">System Architecture</div>
|
| 716 |
+
<h1>Campus-AI</h1>
|
| 717 |
+
<p style="color:var(--purple); font-size:0.85rem; font-weight:600; margin-bottom:8px;">by CounciL</p>
|
| 718 |
+
<p>End-to-end AI pipeline that scrapes, curates, trains, and generates campus event posters</p>
|
| 719 |
+
</div>
|
| 720 |
+
|
| 721 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 722 |
+
<!-- PHASE 1: DATA PIPELINE -->
|
| 723 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 724 |
+
<div class="phase p1">
|
| 725 |
+
<div class="phase-header">
|
| 726 |
+
<div class="phase-number">01</div>
|
| 727 |
+
<div>
|
| 728 |
+
<div class="phase-title">Data Pipeline</div>
|
| 729 |
+
<div class="phase-desc">Collect → Filter → Caption → Split</div>
|
| 730 |
+
</div>
|
| 731 |
+
</div>
|
| 732 |
+
|
| 733 |
+
<div class="flow">
|
| 734 |
+
<div class="card">
|
| 735 |
+
<span class="card-icon">🕷️</span>
|
| 736 |
+
<div class="card-name">Pinterest Scraper</div>
|
| 737 |
+
<div class="card-detail">Selenium headless browser with automatic scrolling & perceptual hash
|
| 738 |
+
deduplication</div>
|
| 739 |
+
<div class="card-tag" style="background:rgba(79,143,247,0.1); color:var(--blue);">57 subcategories × 1,900
|
| 740 |
+
</div>
|
| 741 |
+
</div>
|
| 742 |
+
|
| 743 |
+
<div class="flow-arrow"><svg>
|
| 744 |
+
<line x1="0" y1="10" x2="32" y2="10" />
|
| 745 |
+
<polyline points="30,6 36,10 30,14" />
|
| 746 |
+
</svg></div>
|
| 747 |
+
|
| 748 |
+
<div class="card">
|
| 749 |
+
<span class="card-icon">🔬</span>
|
| 750 |
+
<div class="card-name">Quality Filter</div>
|
| 751 |
+
<div class="card-detail">GPU-accelerated Laplacian sharpness, resolution, aspect ratio & color diversity
|
| 752 |
+
checks</div>
|
| 753 |
+
<div class="card-tag" style="background:rgba(34,211,238,0.1); color:var(--green);">~68% pass rate</div>
|
| 754 |
+
</div>
|
| 755 |
+
|
| 756 |
+
<div class="flow-arrow"><svg>
|
| 757 |
+
<line x1="0" y1="10" x2="32" y2="10" />
|
| 758 |
+
<polyline points="30,6 36,10 30,14" />
|
| 759 |
+
</svg></div>
|
| 760 |
+
|
| 761 |
+
<div class="card">
|
| 762 |
+
<span class="card-icon">📝</span>
|
| 763 |
+
<div class="card-name">Florence-2 Captioner</div>
|
| 764 |
+
<div class="card-detail">Microsoft Florence-2-large generates detailed captions in bf16 with torch.compile
|
| 765 |
+
</div>
|
| 766 |
+
<div class="card-tag" style="background:rgba(139,92,246,0.1); color:var(--purple);">SM120 optimized</div>
|
| 767 |
+
</div>
|
| 768 |
+
|
| 769 |
+
<div class="flow-arrow"><svg>
|
| 770 |
+
<line x1="0" y1="10" x2="32" y2="10" />
|
| 771 |
+
<polyline points="30,6 36,10 30,14" />
|
| 772 |
+
</svg></div>
|
| 773 |
+
|
| 774 |
+
<div class="card">
|
| 775 |
+
<span class="card-icon">✂️</span>
|
| 776 |
+
<div class="card-name">Dataset Splitter</div>
|
| 777 |
+
<div class="card-detail">Stratified splitting by category into training, validation & test sets</div>
|
| 778 |
+
<div class="card-tag" style="background:rgba(245,158,11,0.1); color:var(--orange);">~55K train images</div>
|
| 779 |
+
</div>
|
| 780 |
+
</div>
|
| 781 |
+
</div>
|
| 782 |
+
|
| 783 |
+
<!-- Connector -->
|
| 784 |
+
<div class="phase-connector">
|
| 785 |
+
<svg>
|
| 786 |
+
<line x1="12" y1="0" x2="12" y2="40" />
|
| 787 |
+
<polygon points="6,40 12,50 18,40" />
|
| 788 |
+
</svg>
|
| 789 |
+
</div>
|
| 790 |
+
|
| 791 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 792 |
+
<!-- PHASE 2: TRAINING -->
|
| 793 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 794 |
+
<div class="phase p2">
|
| 795 |
+
<div class="phase-header">
|
| 796 |
+
<div class="phase-number">02</div>
|
| 797 |
+
<div>
|
| 798 |
+
<div class="phase-title">Training Pipeline</div>
|
| 799 |
+
<div class="phase-desc">Fine-tune Flux.1-dev with LoRA adapters</div>
|
| 800 |
+
</div>
|
| 801 |
+
</div>
|
| 802 |
+
|
| 803 |
+
<div class="training-layout">
|
| 804 |
+
<!-- Left: Optimizer & Loss -->
|
| 805 |
+
<div class="side-stack">
|
| 806 |
+
<div class="side-card">
|
| 807 |
+
<div class="side-card-title"><span class="emoji">⚡</span> Prodigy Optimizer</div>
|
| 808 |
+
<div class="side-card-detail">Self-adapting LR = 1.0<br>No manual LR tuning needed</div>
|
| 809 |
+
</div>
|
| 810 |
+
<div class="side-card">
|
| 811 |
+
<div class="side-card-title"><span class="emoji">📉</span> Min-SNR-γ Loss</div>
|
| 812 |
+
<div class="side-card-detail">γ = 5.0 — balanced learning<br>across all noise levels</div>
|
| 813 |
+
</div>
|
| 814 |
+
<div class="side-card">
|
| 815 |
+
<div class="side-card-title"><span class="emoji">🔄</span> Cosine Warm Restarts</div>
|
| 816 |
+
<div class="side-card-detail">3 cycles over 4 epochs<br>escapes local minima</div>
|
| 817 |
+
</div>
|
| 818 |
+
</div>
|
| 819 |
+
|
| 820 |
+
<!-- Center: Core model -->
|
| 821 |
+
<div class="train-core">
|
| 822 |
+
<span class="card-icon">🧠</span>
|
| 823 |
+
<div class="card-name">Flux.1-dev</div>
|
| 824 |
+
<div class="card-detail" style="margin-top:8px;">12 billion parameter<br>transformer diffusion model</div>
|
| 825 |
+
<div class="lora-badge">+ LoRA Adapter (Rank 16, α=16)</div>
|
| 826 |
+
<div class="train-specs">
|
| 827 |
+
<div class="spec">
|
| 828 |
+
<div class="spec-val">40M</div>
|
| 829 |
+
<div class="spec-label">Trainable Params</div>
|
| 830 |
+
</div>
|
| 831 |
+
<div class="spec">
|
| 832 |
+
<div class="spec-val">bf16</div>
|
| 833 |
+
<div class="spec-label">Precision</div>
|
| 834 |
+
</div>
|
| 835 |
+
<div class="spec">
|
| 836 |
+
<div class="spec-val">4</div>
|
| 837 |
+
<div class="spec-label">Eff. Batch Size</div>
|
| 838 |
+
</div>
|
| 839 |
+
<div class="spec">
|
| 840 |
+
<div class="spec-val">~55K</div>
|
| 841 |
+
<div class="spec-label">Optimizer Steps</div>
|
| 842 |
+
</div>
|
| 843 |
+
</div>
|
| 844 |
+
</div>
|
| 845 |
+
|
| 846 |
+
<!-- Right: Anti-overfitting & Hardware -->
|
| 847 |
+
<div class="side-stack">
|
| 848 |
+
<div class="side-card">
|
| 849 |
+
<div class="side-card-title"><span class="emoji">🛡️</span> Anti-Overfitting</div>
|
| 850 |
+
<div class="side-card-detail">Caption dropout 10%<br>LoRA dropout 8%<br>L2 weight decay 0.01</div>
|
| 851 |
+
</div>
|
| 852 |
+
<div class="side-card">
|
| 853 |
+
<div class="side-card-title"><span class="emoji">⚙️</span> LoRA+ (ICML '24)</div>
|
| 854 |
+
<div class="side-card-detail">B matrix gets 16× higher LR<br>Free +2% accuracy boost</div>
|
| 855 |
+
</div>
|
| 856 |
+
<div class="side-card">
|
| 857 |
+
<div class="side-card-title"><span class="emoji">🖥️</span> SM120 Blackwell</div>
|
| 858 |
+
<div class="side-card-detail">TF32 tensor cores<br>torch.compile max-autotune</div>
|
| 859 |
+
</div>
|
| 860 |
+
</div>
|
| 861 |
+
</div>
|
| 862 |
+
</div>
|
| 863 |
+
|
| 864 |
+
<!-- Connector -->
|
| 865 |
+
<div class="phase-connector">
|
| 866 |
+
<svg>
|
| 867 |
+
<line x1="12" y1="0" x2="12" y2="40" />
|
| 868 |
+
<polygon points="6,40 12,50 18,40" />
|
| 869 |
+
</svg>
|
| 870 |
+
</div>
|
| 871 |
+
|
| 872 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 873 |
+
<!-- PHASE 3: INFERENCE & DEPLOYMENT -->
|
| 874 |
+
<!-- ═══════════════════════════════════════════ -->
|
| 875 |
+
<div class="phase p3">
|
| 876 |
+
<div class="phase-header">
|
| 877 |
+
<div class="phase-number">03</div>
|
| 878 |
+
<div>
|
| 879 |
+
<div class="phase-title">Inference & Deployment</div>
|
| 880 |
+
<div class="phase-desc">Prompt → Generate → Upscale → Deliver</div>
|
| 881 |
+
</div>
|
| 882 |
+
</div>
|
| 883 |
+
|
| 884 |
+
<div class="inference-layout">
|
| 885 |
+
<!-- Left: input -->
|
| 886 |
+
<div class="inf-input">
|
| 887 |
+
<div class="card">
|
| 888 |
+
<span class="card-icon">👤</span>
|
| 889 |
+
<div class="card-name">User Input</div>
|
| 890 |
+
<div class="card-detail">Event description, type, visual style & resolution preset</div>
|
| 891 |
+
</div>
|
| 892 |
+
|
| 893 |
+
<div style="text-align:center;">
|
| 894 |
+
<svg width="24" height="30">
|
| 895 |
+
<line x1="12" y1="0" x2="12" y2="22" stroke="var(--muted)" stroke-width="1.5" stroke-dasharray="4 3">
|
| 896 |
+
<animate attributeName="stroke-dashoffset" from="0" to="-14" dur="1.5s" repeatCount="indefinite" />
|
| 897 |
+
</line>
|
| 898 |
+
<polygon points="6,22 12,30 18,22" fill="var(--muted)" opacity="0.5" />
|
| 899 |
+
</svg>
|
| 900 |
+
</div>
|
| 901 |
+
|
| 902 |
+
<div class="card">
|
| 903 |
+
<span class="card-icon">🦙</span>
|
| 904 |
+
<div class="card-name">Groq Llama 3.3 70B</div>
|
| 905 |
+
<div class="card-detail">Enhances plain text into detailed Flux-optimized prompts</div>
|
| 906 |
+
<div class="card-tag" style="background:rgba(245,158,11,0.1); color:var(--orange);">~200ms API</div>
|
| 907 |
+
</div>
|
| 908 |
+
</div>
|
| 909 |
+
|
| 910 |
+
<!-- Center: engine -->
|
| 911 |
+
<div class="inf-engine">
|
| 912 |
+
<div class="engine-label">Flux.1-dev + LoRA Inference Engine</div>
|
| 913 |
+
<div class="modes">
|
| 914 |
+
<div class="mode">
|
| 915 |
+
<div class="mode-icon">✍️</div>
|
| 916 |
+
<div class="mode-name">Text → Poster</div>
|
| 917 |
+
<div class="mode-sub">From description only</div>
|
| 918 |
+
</div>
|
| 919 |
+
<div class="mode">
|
| 920 |
+
<div class="mode-icon">🖼️</div>
|
| 921 |
+
<div class="mode-name">Reference Style</div>
|
| 922 |
+
<div class="mode-sub">IP-Adapter transfer</div>
|
| 923 |
+
</div>
|
| 924 |
+
<div class="mode">
|
| 925 |
+
<div class="mode-icon">🔄</div>
|
| 926 |
+
<div class="mode-name">Image → Image</div>
|
| 927 |
+
<div class="mode-sub">Transform existing art</div>
|
| 928 |
+
</div>
|
| 929 |
+
<div class="mode">
|
| 930 |
+
<div class="mode-icon">🎭</div>
|
| 931 |
+
<div class="mode-name">Inpainting</div>
|
| 932 |
+
<div class="mode-sub">Edit specific regions</div>
|
| 933 |
+
</div>
|
| 934 |
+
</div>
|
| 935 |
+
<div class="engine-footer">CPU offload • ~10GB peak VRAM • bf16 precision</div>
|
| 936 |
+
</div>
|
| 937 |
+
|
| 938 |
+
<!-- Right: output -->
|
| 939 |
+
<div class="inf-output">
|
| 940 |
+
<div class="card">
|
| 941 |
+
<span class="card-icon">🔎</span>
|
| 942 |
+
<div class="card-name">Real-ESRGAN 2×</div>
|
| 943 |
+
<div class="card-detail">AI upscaling for crisp HD output at any size</div>
|
| 944 |
+
</div>
|
| 945 |
+
|
| 946 |
+
<div style="text-align:center;">
|
| 947 |
+
<svg width="24" height="30">
|
| 948 |
+
<line x1="12" y1="0" x2="12" y2="22" stroke="var(--muted)" stroke-width="1.5" stroke-dasharray="4 3">
|
| 949 |
+
<animate attributeName="stroke-dashoffset" from="0" to="-14" dur="1.5s" repeatCount="indefinite" />
|
| 950 |
+
</line>
|
| 951 |
+
<polygon points="6,22 12,30 18,22" fill="var(--muted)" opacity="0.5" />
|
| 952 |
+
</svg>
|
| 953 |
+
</div>
|
| 954 |
+
|
| 955 |
+
<div class="card result-card">
|
| 956 |
+
<span class="card-icon">🎨</span>
|
| 957 |
+
<div class="card-name">Generated Poster</div>
|
| 958 |
+
<div class="card-detail">1024×1024 to 1152×768<br>Multiple variants supported</div>
|
| 959 |
+
</div>
|
| 960 |
+
</div>
|
| 961 |
+
</div>
|
| 962 |
+
|
| 963 |
+
<!-- Deploy chips -->
|
| 964 |
+
<div class="deploy">
|
| 965 |
+
<div class="deploy-chip chip-local">🖥️ Local — RTX 5070 Ti (12GB VRAM)</div>
|
| 966 |
+
<div class="deploy-chip chip-cloud">☁️ Cloud — HF Spaces + ZeroGPU</div>
|
| 967 |
+
</div>
|
| 968 |
+
</div>
|
| 969 |
+
|
| 970 |
+
<!-- ═══ STATS ═══ -->
|
| 971 |
+
<div class="stats-bar">
|
| 972 |
+
<div class="stat">
|
| 973 |
+
<div class="stat-value">71K+</div>
|
| 974 |
+
<div class="stat-label">Training Images</div>
|
| 975 |
+
</div>
|
| 976 |
+
<div class="stat">
|
| 977 |
+
<div class="stat-value">57</div>
|
| 978 |
+
<div class="stat-label">Subcategories</div>
|
| 979 |
+
</div>
|
| 980 |
+
<div class="stat">
|
| 981 |
+
<div class="stat-value">12B</div>
|
| 982 |
+
<div class="stat-label">Base Params</div>
|
| 983 |
+
</div>
|
| 984 |
+
<div class="stat">
|
| 985 |
+
<div class="stat-value">40M</div>
|
| 986 |
+
<div class="stat-label">LoRA Params</div>
|
| 987 |
+
</div>
|
| 988 |
+
<div class="stat">
|
| 989 |
+
<div class="stat-value">SM120</div>
|
| 990 |
+
<div class="stat-label">GPU Arch</div>
|
| 991 |
+
</div>
|
| 992 |
+
<div class="stat">
|
| 993 |
+
<div class="stat-value">~46h</div>
|
| 994 |
+
<div class="stat-label">Training Time</div>
|
| 995 |
+
</div>
|
| 996 |
+
</div>
|
| 997 |
+
|
| 998 |
+
<div class="footer">Campus-AI · CounciL · Built with <span>❤️</span> for the Indian campus
|
| 999 |
+
community</div>
|
| 1000 |
+
|
| 1001 |
+
</div>
|
| 1002 |
+
</body>
|
| 1003 |
+
|
| 1004 |
+
</html>
|
requirements.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CampusGen AI - Requirements
|
| 2 |
+
# Python 3.11.14 | CUDA 13.0 (cu130)
|
| 3 |
+
|
| 4 |
+
# ===== PyTorch (CUDA 13.0) =====
|
| 5 |
+
--extra-index-url https://download.pytorch.org/whl/cu130
|
| 6 |
+
torch
|
| 7 |
+
torchvision
|
| 8 |
+
torchaudio
|
| 9 |
+
|
| 10 |
+
# ===== Hugging Face Ecosystem =====
|
| 11 |
+
transformers
|
| 12 |
+
diffusers
|
| 13 |
+
datasets
|
| 14 |
+
peft
|
| 15 |
+
trl
|
| 16 |
+
huggingface_hub
|
| 17 |
+
accelerate
|
| 18 |
+
safetensors
|
| 19 |
+
|
| 20 |
+
# ===== Unsloth (fast LoRA fine-tuning) =====
|
| 21 |
+
unsloth
|
| 22 |
+
|
| 23 |
+
# ===== Quantization =====
|
| 24 |
+
bitsandbytes
|
| 25 |
+
|
| 26 |
+
# ===== Evaluation Metrics =====
|
| 27 |
+
torchmetrics
|
| 28 |
+
|
| 29 |
+
# ===== Toxicity Detection =====
|
| 30 |
+
detoxify
|
| 31 |
+
|
| 32 |
+
# ===== Web UI =====
|
| 33 |
+
gradio
|
| 34 |
+
|
| 35 |
+
# ===== Evaluation Dependencies =====
|
| 36 |
+
scipy
|
| 37 |
+
open-clip-torch
|
| 38 |
+
|
| 39 |
+
# ===== Dataset Downloading =====
|
| 40 |
+
kaggle
|
| 41 |
+
|
| 42 |
+
# ===== Scraping & Web =====
|
| 43 |
+
selenium
|
| 44 |
+
webdriver-manager
|
| 45 |
+
beautifulsoup4
|
| 46 |
+
|
| 47 |
+
# ===== Image Processing =====
|
| 48 |
+
opencv-python
|
| 49 |
+
imagehash
|
| 50 |
+
|
| 51 |
+
# ===== Data & Utilities =====
|
| 52 |
+
numpy
|
| 53 |
+
Pillow
|
| 54 |
+
tqdm
|
| 55 |
+
requests
|
| 56 |
+
pyyaml
|
| 57 |
+
tensorboard
|
scripts/augment_specific_v3.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import logging
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
import glob
|
| 8 |
+
|
| 9 |
+
# Configure logging
|
| 10 |
+
logging.basicConfig(
|
| 11 |
+
level=logging.INFO,
|
| 12 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 13 |
+
datefmt="%H:%M:%S"
|
| 14 |
+
)
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
# Try to import optional dependencies
|
| 18 |
+
try:
|
| 19 |
+
from PIL import Image
|
| 20 |
+
PIL_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
PIL_AVAILABLE = False
|
| 23 |
+
logger.warning("⚠️ PIL (Pillow) not found. Image validation will be skipped (only file extension check).")
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
from tqdm import tqdm
|
| 27 |
+
TQDM_AVAILABLE = True
|
| 28 |
+
except ImportError:
|
| 29 |
+
TQDM_AVAILABLE = False
|
| 30 |
+
|
| 31 |
+
# Configuration
|
| 32 |
+
TARGET_COUNT = 1300 # Safety margin above 1000
|
| 33 |
+
TARGET_CATEGORIES = [
|
| 34 |
+
"workshops/coding",
|
| 35 |
+
"workshops/design"
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
DATA_ROOT = Path("data")
|
| 39 |
+
RAW_ROOT = DATA_ROOT / "raw"
|
| 40 |
+
PROCESSED_ROOT = DATA_ROOT / "processed"
|
| 41 |
+
|
| 42 |
+
def get_image_files(directory):
|
| 43 |
+
"""Recursively get all image files in a directory."""
|
| 44 |
+
extensions = {'*.jpg', '*.jpeg', '*.png', '*.webp', '*.bmp'}
|
| 45 |
+
files = []
|
| 46 |
+
if not directory.exists():
|
| 47 |
+
return files
|
| 48 |
+
|
| 49 |
+
for ext in extensions:
|
| 50 |
+
# Case insensitive search would be better but glob is case sensitive on Linux/WSL usually.
|
| 51 |
+
# We will try both cases or just standarize.
|
| 52 |
+
# Walking is safer for case insensitivity if needed, but glob is faster.
|
| 53 |
+
files.extend(directory.glob(f"**/{ext}"))
|
| 54 |
+
files.extend(directory.glob(f"**/{ext.upper()}"))
|
| 55 |
+
return sorted(list(set(files)))
|
| 56 |
+
|
| 57 |
+
def check_image_quality(file_path):
|
| 58 |
+
"""
|
| 59 |
+
Basic quality check using PIL (if available).
|
| 60 |
+
Returns (Passed: bool, Message: str)
|
| 61 |
+
"""
|
| 62 |
+
if not PIL_AVAILABLE:
|
| 63 |
+
# If PIL is missing, we assume file is okay if it exists and has size
|
| 64 |
+
if file_path.stat().st_size < 5120: # < 5KB is suspect
|
| 65 |
+
return False, "File too small"
|
| 66 |
+
return True, "No PIL check"
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
with Image.open(file_path) as img:
|
| 70 |
+
width, height = img.size
|
| 71 |
+
if width < 256 or height < 256:
|
| 72 |
+
return False, f"Low resolution: {width}x{height}"
|
| 73 |
+
|
| 74 |
+
# Aspect ratio check
|
| 75 |
+
aspect = width / height
|
| 76 |
+
if aspect < 0.4 or aspect > 2.5:
|
| 77 |
+
return False, f"Extreme aspect ratio: {aspect:.2f}"
|
| 78 |
+
|
| 79 |
+
return True, "OK"
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return False, f"Corrupt image: {str(e)}"
|
| 82 |
+
|
| 83 |
+
def process_category(relative_path):
|
| 84 |
+
"""Process a single category."""
|
| 85 |
+
category_name = str(relative_path).replace("\\", "/")
|
| 86 |
+
logger.info(f"🔍 Checking category: {category_name}")
|
| 87 |
+
|
| 88 |
+
raw_path = RAW_ROOT / relative_path
|
| 89 |
+
processed_path = PROCESSED_ROOT / relative_path
|
| 90 |
+
|
| 91 |
+
# Ensure processed directory exists
|
| 92 |
+
processed_path.mkdir(parents=True, exist_ok=True)
|
| 93 |
+
|
| 94 |
+
# 1. Count current Processed
|
| 95 |
+
processed_files = get_image_files(processed_path)
|
| 96 |
+
current_count = len(processed_files)
|
| 97 |
+
processed_filenames = {f.name for f in processed_files}
|
| 98 |
+
|
| 99 |
+
logger.info(f" Existing processed images: {current_count}")
|
| 100 |
+
|
| 101 |
+
if current_count >= TARGET_COUNT:
|
| 102 |
+
logger.info(f" ✅ Already met target of {TARGET_COUNT}. Skipping.")
|
| 103 |
+
return
|
| 104 |
+
|
| 105 |
+
needed = TARGET_COUNT - current_count
|
| 106 |
+
logger.info(f" ⚠️ Need {needed} more images.")
|
| 107 |
+
|
| 108 |
+
# 2. Get Raw Candidates
|
| 109 |
+
raw_files = get_image_files(raw_path)
|
| 110 |
+
logger.info(f" Found {len(raw_files)} raw images available.")
|
| 111 |
+
|
| 112 |
+
# Filter out files that are already in processed (by filename)
|
| 113 |
+
candidates = [f for f in raw_files if f.name not in processed_filenames]
|
| 114 |
+
logger.info(f" {len(candidates)} new unique candidates available to process.")
|
| 115 |
+
|
| 116 |
+
if not candidates:
|
| 117 |
+
logger.warning(" ❌ No new candidates found in raw folder!")
|
| 118 |
+
return
|
| 119 |
+
|
| 120 |
+
# 3. Copy Candidates
|
| 121 |
+
added_count = 0
|
| 122 |
+
passed_check = 0
|
| 123 |
+
failed_check = 0
|
| 124 |
+
|
| 125 |
+
# Progress bar setup
|
| 126 |
+
iterator = tqdm(candidates, unit="img") if TQDM_AVAILABLE else candidates
|
| 127 |
+
|
| 128 |
+
for src_file in iterator:
|
| 129 |
+
if added_count >= needed:
|
| 130 |
+
break
|
| 131 |
+
|
| 132 |
+
# Quality Check
|
| 133 |
+
is_ok, msg = check_image_quality(src_file)
|
| 134 |
+
if not is_ok:
|
| 135 |
+
failed_check += 1
|
| 136 |
+
continue
|
| 137 |
+
|
| 138 |
+
# Copy
|
| 139 |
+
dst_file = processed_path / src_file.name
|
| 140 |
+
try:
|
| 141 |
+
shutil.copy2(src_file, dst_file)
|
| 142 |
+
added_count += 1
|
| 143 |
+
passed_check += 1
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error(f"Failed to copy {src_file.name}: {e}")
|
| 146 |
+
|
| 147 |
+
logger.info(f" 🎉 Added {added_count} images.")
|
| 148 |
+
logger.info(f" Final Count: {current_count + added_count}")
|
| 149 |
+
logger.info("-" * 40)
|
| 150 |
+
|
| 151 |
+
def main():
|
| 152 |
+
logger.info("🚀 Starting targeted dataset augmentation...")
|
| 153 |
+
logger.info(f"📂 Data Root: {DATA_ROOT.absolute()}")
|
| 154 |
+
logger.info(f"🎯 Target: {TARGET_COUNT} images per category")
|
| 155 |
+
|
| 156 |
+
for cat in TARGET_CATEGORIES:
|
| 157 |
+
process_category(Path(cat))
|
| 158 |
+
|
| 159 |
+
logger.info("✨ Done.")
|
| 160 |
+
|
| 161 |
+
if __name__ == "__main__":
|
| 162 |
+
main()
|
scripts/caption_generator.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Caption Generator — Florence-2 Native (transformers >= 4.56)
|
| 5 |
+
|
| 6 |
+
Multi-task captioning: MORE_DETAILED_CAPTION + OCR + DENSE_REGION_CAPTION
|
| 7 |
+
|
| 8 |
+
SETUP (run ONCE):
|
| 9 |
+
pip install "transformers==4.57.3" tokenizers --upgrade
|
| 10 |
+
rm -rf ~/.cache/huggingface/modules/transformers_modules/
|
| 11 |
+
|
| 12 |
+
Outputs:
|
| 13 |
+
data/{split}/{category}/image.txt
|
| 14 |
+
data/{split}/metadata.json
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import os
|
| 18 |
+
import re
|
| 19 |
+
import sys
|
| 20 |
+
import json
|
| 21 |
+
import logging
|
| 22 |
+
import argparse
|
| 23 |
+
import traceback
|
| 24 |
+
import warnings
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
|
| 28 |
+
import yaml
|
| 29 |
+
import torch
|
| 30 |
+
from PIL import Image, ImageFile
|
| 31 |
+
from tqdm import tqdm
|
| 32 |
+
|
| 33 |
+
Image.MAX_IMAGE_PIXELS = None
|
| 34 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 35 |
+
|
| 36 |
+
import transformers
|
| 37 |
+
transformers.logging.set_verbosity_error()
|
| 38 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 39 |
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
| 40 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
| 41 |
+
|
| 42 |
+
if torch.cuda.is_available():
|
| 43 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 44 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 45 |
+
|
| 46 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 47 |
+
# Logging
|
| 48 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 49 |
+
Path("logs").mkdir(exist_ok=True)
|
| 50 |
+
logging.basicConfig(
|
| 51 |
+
level=logging.INFO,
|
| 52 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 53 |
+
handlers=[
|
| 54 |
+
logging.StreamHandler(),
|
| 55 |
+
logging.FileHandler("logs/caption_generator.log"),
|
| 56 |
+
],
|
| 57 |
+
)
|
| 58 |
+
logger = logging.getLogger(__name__)
|
| 59 |
+
|
| 60 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 61 |
+
# Config
|
| 62 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 63 |
+
def load_config(config_path: str = "configs/config.yaml") -> dict:
|
| 64 |
+
with open(config_path, "r", encoding="utf-8") as f:
|
| 65 |
+
return yaml.safe_load(f)
|
| 66 |
+
|
| 67 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 68 |
+
# Constants
|
| 69 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 70 |
+
MODEL_ID = "ducviet00/Florence-2-large-hf"
|
| 71 |
+
|
| 72 |
+
TASKS = ["<MORE_DETAILED_CAPTION>", "<OCR>", "<DENSE_REGION_CAPTION>"]
|
| 73 |
+
TASK_KEY = {
|
| 74 |
+
"<MORE_DETAILED_CAPTION>": "visual",
|
| 75 |
+
"<OCR>": "ocr",
|
| 76 |
+
"<DENSE_REGION_CAPTION>": "regions",
|
| 77 |
+
}
|
| 78 |
+
CATEGORY_LABELS = {
|
| 79 |
+
"tech_fest": "A technology fest event poster",
|
| 80 |
+
"cultural_fest": "A cultural festival event poster",
|
| 81 |
+
"college_events": "A college event poster",
|
| 82 |
+
"sports": "A sports tournament event poster",
|
| 83 |
+
"festivals": "A festival celebration event poster",
|
| 84 |
+
"workshops": "A workshop or seminar event poster",
|
| 85 |
+
"social": "A social awareness event poster",
|
| 86 |
+
"entertainment": "An entertainment event poster",
|
| 87 |
+
"styles": "A stylized event poster",
|
| 88 |
+
"general": "An event poster",
|
| 89 |
+
"diwali": "A Diwali celebration event poster",
|
| 90 |
+
"holi": "A Holi festival event poster",
|
| 91 |
+
"navratri": "A Navratri festival event poster",
|
| 92 |
+
"eid": "An Eid celebration event poster",
|
| 93 |
+
"ganesh": "A Ganesh Chaturthi event poster",
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 97 |
+
# Cache guard
|
| 98 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 99 |
+
def _check_stale_cache():
|
| 100 |
+
stale = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules"
|
| 101 |
+
if stale.exists():
|
| 102 |
+
logger.warning(
|
| 103 |
+
f"Stale remote-code cache at {stale} — "
|
| 104 |
+
"run: rm -rf ~/.cache/huggingface/modules/transformers_modules/"
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# ─────────────────────���───────────────────────────────────────────────────────
|
| 108 |
+
# Florence-2 Captioner
|
| 109 |
+
# Direct-class loading — bypasses auto_map, no Auto* classes used
|
| 110 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 111 |
+
class Florence2Captioner:
|
| 112 |
+
"""Multi-task Florence-2 captioner using native transformers classes."""
|
| 113 |
+
|
| 114 |
+
def __init__(self, device: str = "auto"):
|
| 115 |
+
from transformers import Florence2ForConditionalGeneration, Florence2Processor
|
| 116 |
+
from transformers.models.bart import BartTokenizerFast
|
| 117 |
+
from transformers.models.clip.image_processing_clip import CLIPImageProcessor
|
| 118 |
+
|
| 119 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu" if device == "auto" else device
|
| 120 |
+
self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
| 121 |
+
|
| 122 |
+
logger.info(f"transformers : {transformers.__version__}")
|
| 123 |
+
logger.info(f"torch : {torch.__version__}")
|
| 124 |
+
logger.info(f"device/dtype : {self.device} / {self.dtype}")
|
| 125 |
+
logger.info(f"Loading {MODEL_ID} ...")
|
| 126 |
+
|
| 127 |
+
# Direct tokenizer load — bypasses AutoTokenizer & auto_map
|
| 128 |
+
tokenizer = BartTokenizerFast.from_pretrained(MODEL_ID)
|
| 129 |
+
|
| 130 |
+
# Patch image_token if missing (required by Florence2Processor.__init__)
|
| 131 |
+
if not hasattr(tokenizer, "image_token") or tokenizer.image_token is None:
|
| 132 |
+
tok_vocab = tokenizer.get_vocab()
|
| 133 |
+
image_token = next(
|
| 134 |
+
(t for t in ["<image>", "</s>", "<unk>"] if t in tok_vocab), None
|
| 135 |
+
)
|
| 136 |
+
if image_token is None:
|
| 137 |
+
tokenizer.add_tokens(["<image>"], special_tokens=True)
|
| 138 |
+
image_token = "<image>"
|
| 139 |
+
tokenizer.image_token = image_token
|
| 140 |
+
tokenizer.image_token_id = tokenizer.convert_tokens_to_ids(image_token)
|
| 141 |
+
logger.info(f"Patched image_token='{image_token}' (id={tokenizer.image_token_id})")
|
| 142 |
+
|
| 143 |
+
# Direct image processor load — bypasses AutoImageProcessor & auto_map
|
| 144 |
+
image_processor = CLIPImageProcessor.from_pretrained(MODEL_ID)
|
| 145 |
+
|
| 146 |
+
# Assemble processor from components (bypasses from_pretrained's AutoTokenizer call)
|
| 147 |
+
self.processor = Florence2Processor(
|
| 148 |
+
image_processor=image_processor,
|
| 149 |
+
tokenizer=tokenizer,
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
# Direct model load — bypasses AutoModel & auto_map in config.json
|
| 153 |
+
self.model = Florence2ForConditionalGeneration.from_pretrained(
|
| 154 |
+
MODEL_ID,
|
| 155 |
+
torch_dtype=self.dtype,
|
| 156 |
+
ignore_mismatched_sizes=False,
|
| 157 |
+
).to(self.device)
|
| 158 |
+
self.model.eval()
|
| 159 |
+
logger.info("Florence-2 loaded successfully.")
|
| 160 |
+
|
| 161 |
+
# ── helpers ───────────────────────────────────────────────────────────────
|
| 162 |
+
|
| 163 |
+
def _safe_to_device(self, inputs: dict) -> dict:
|
| 164 |
+
out = {}
|
| 165 |
+
for k, v in inputs.items():
|
| 166 |
+
if not isinstance(v, torch.Tensor):
|
| 167 |
+
out[k] = v
|
| 168 |
+
elif v.is_floating_point():
|
| 169 |
+
out[k] = v.to(device=self.device, dtype=self.dtype)
|
| 170 |
+
else:
|
| 171 |
+
out[k] = v.to(device=self.device)
|
| 172 |
+
return out
|
| 173 |
+
|
| 174 |
+
def _run_task(self, image: Image.Image, task: str) -> str:
|
| 175 |
+
"""Run one Florence-2 task; returns clean decoded string."""
|
| 176 |
+
inputs = self.processor(text=task, images=image, return_tensors="pt")
|
| 177 |
+
inputs = self._safe_to_device(inputs)
|
| 178 |
+
|
| 179 |
+
with torch.no_grad():
|
| 180 |
+
generated_ids = self.model.generate(
|
| 181 |
+
input_ids=inputs["input_ids"],
|
| 182 |
+
pixel_values=inputs["pixel_values"],
|
| 183 |
+
max_new_tokens=512,
|
| 184 |
+
num_beams=3,
|
| 185 |
+
do_sample=False,
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
# Decode directly — post_process_generation raises
|
| 189 |
+
# "Unsupported parse task: pure_text/description_with_bboxes"
|
| 190 |
+
# in transformers 4.57.3 due to processor_config task-type mismatch.
|
| 191 |
+
# Direct decoding gives identical text for all tasks we use.
|
| 192 |
+
text = self.processor.batch_decode(
|
| 193 |
+
generated_ids, skip_special_tokens=True
|
| 194 |
+
)[0].strip()
|
| 195 |
+
|
| 196 |
+
# Strip task prompt tokens if they leaked through decode
|
| 197 |
+
for tok in TASKS:
|
| 198 |
+
text = text.replace(tok, "").strip()
|
| 199 |
+
|
| 200 |
+
# DENSE_REGION_CAPTION contains <loc_NNN> coordinate tokens;
|
| 201 |
+
# strip them to keep only the human-readable region labels
|
| 202 |
+
if task == "<DENSE_REGION_CAPTION>":
|
| 203 |
+
text = re.sub(r"<loc_\d+>", "", text)
|
| 204 |
+
text = re.sub(r"\s{2,}", " ", text).strip(" ,")
|
| 205 |
+
|
| 206 |
+
return text
|
| 207 |
+
|
| 208 |
+
# ── public API ───��────────────────────────────────────────────────────────
|
| 209 |
+
|
| 210 |
+
def caption(self, image: Image.Image) -> dict:
|
| 211 |
+
"""Run all tasks; returns {visual, ocr, regions}."""
|
| 212 |
+
if image.width < 16 or image.height < 16:
|
| 213 |
+
raise ValueError(f"Image too small: {image.size}")
|
| 214 |
+
results = {}
|
| 215 |
+
for task in TASKS:
|
| 216 |
+
key = TASK_KEY[task]
|
| 217 |
+
try:
|
| 218 |
+
results[key] = self._run_task(image, task)
|
| 219 |
+
except Exception as e:
|
| 220 |
+
logger.warning(f"Task {task} failed: {e}\n{traceback.format_exc()}")
|
| 221 |
+
results[key] = ""
|
| 222 |
+
return results
|
| 223 |
+
|
| 224 |
+
def build_caption(self, task_results: dict, category: str) -> str:
|
| 225 |
+
"""Merge multi-task results into one Flux fine-tuning caption."""
|
| 226 |
+
parent = category.split("/")[0] if "/" in category else category
|
| 227 |
+
prefix = CATEGORY_LABELS.get(category, CATEGORY_LABELS.get(parent, "An event poster"))
|
| 228 |
+
|
| 229 |
+
visual = task_results.get("visual", "").strip()
|
| 230 |
+
ocr = task_results.get("ocr", "").strip()
|
| 231 |
+
regions = task_results.get("regions", "").strip()
|
| 232 |
+
|
| 233 |
+
parts = [f"campus_ai_poster {prefix}."]
|
| 234 |
+
if visual:
|
| 235 |
+
parts.append(visual)
|
| 236 |
+
if ocr:
|
| 237 |
+
ocr_clean = " | ".join(dict.fromkeys(
|
| 238 |
+
t.strip() for t in ocr.replace("\n", " | ").split(" | ") if t.strip()
|
| 239 |
+
))
|
| 240 |
+
parts.append(f"[Text on poster: {ocr_clean}]")
|
| 241 |
+
if regions:
|
| 242 |
+
r = regions[:400].rsplit(".", 1)[0] + "." if len(regions) > 400 else regions
|
| 243 |
+
parts.append(f"[Design elements: {r}]")
|
| 244 |
+
|
| 245 |
+
return " ".join(parts)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 249 |
+
# Checkpoint helpers
|
| 250 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 251 |
+
def load_checkpoint(path: Path) -> set:
|
| 252 |
+
return set(json.loads(path.read_text())) if path.exists() else set()
|
| 253 |
+
|
| 254 |
+
def save_checkpoint(path: Path, done: set):
|
| 255 |
+
path.write_text(json.dumps(sorted(done)))
|
| 256 |
+
|
| 257 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 258 |
+
# Pipeline
|
| 259 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 260 |
+
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def run_captioning(config: dict, splits: list):
|
| 264 |
+
_check_stale_cache()
|
| 265 |
+
|
| 266 |
+
data_paths = config.get("paths", {}).get("data", {})
|
| 267 |
+
if not data_paths:
|
| 268 |
+
logger.error("Missing 'paths.data' in config.yaml")
|
| 269 |
+
sys.exit(1)
|
| 270 |
+
|
| 271 |
+
try:
|
| 272 |
+
captioner = Florence2Captioner()
|
| 273 |
+
except Exception:
|
| 274 |
+
logger.error(f"Could not load Florence-2:\n{traceback.format_exc()}")
|
| 275 |
+
sys.exit(1)
|
| 276 |
+
|
| 277 |
+
for split in splits:
|
| 278 |
+
if split not in data_paths:
|
| 279 |
+
logger.warning(f"'{split}' not in config paths. Skipping.")
|
| 280 |
+
continue
|
| 281 |
+
split_dir = Path(data_paths[split])
|
| 282 |
+
if not split_dir.exists():
|
| 283 |
+
logger.warning(f"Dir not found: {split_dir}. Skipping.")
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
logger.info(f"\n{'='*60}")
|
| 287 |
+
logger.info(f" Split: {split.upper()} ({split_dir})")
|
| 288 |
+
logger.info(f"{'='*60}")
|
| 289 |
+
|
| 290 |
+
ckpt_path = split_dir / ".caption_checkpoint.json"
|
| 291 |
+
done = load_checkpoint(ckpt_path)
|
| 292 |
+
logger.info(f"Checkpoint: {len(done)} already captioned.")
|
| 293 |
+
|
| 294 |
+
all_imgs = []
|
| 295 |
+
for root, _, files in os.walk(split_dir):
|
| 296 |
+
rp = Path(root)
|
| 297 |
+
for fname in sorted(files):
|
| 298 |
+
fp = rp / fname
|
| 299 |
+
if fp.suffix.lower() in IMAGE_EXTS:
|
| 300 |
+
cat = str(rp.relative_to(split_dir)).replace("\\", "/")
|
| 301 |
+
all_imgs.append((cat if cat != "." else "general", fp))
|
| 302 |
+
|
| 303 |
+
logger.info(f"Total : {len(all_imgs)} | Remaining : {len(all_imgs) - len(done)}")
|
| 304 |
+
remaining = [(c, p) for c, p in all_imgs if str(p) not in done]
|
| 305 |
+
|
| 306 |
+
if not remaining:
|
| 307 |
+
logger.info("Already complete.")
|
| 308 |
+
continue
|
| 309 |
+
|
| 310 |
+
meta_path = split_dir / "metadata.json"
|
| 311 |
+
metadata: list = []
|
| 312 |
+
if meta_path.exists():
|
| 313 |
+
try:
|
| 314 |
+
metadata = json.loads(meta_path.read_text(encoding="utf-8"))
|
| 315 |
+
except Exception:
|
| 316 |
+
logger.warning("Could not read existing metadata; starting fresh.")
|
| 317 |
+
|
| 318 |
+
failed = 0
|
| 319 |
+
sample_logged = False
|
| 320 |
+
|
| 321 |
+
for cat, img_path in tqdm(remaining, desc=split):
|
| 322 |
+
try:
|
| 323 |
+
img = Image.open(img_path).convert("RGB")
|
| 324 |
+
img.load()
|
| 325 |
+
except Exception as e:
|
| 326 |
+
logger.warning(f"Bad image [{img_path.name}]: {e}")
|
| 327 |
+
failed += 1
|
| 328 |
+
continue
|
| 329 |
+
|
| 330 |
+
try:
|
| 331 |
+
results = captioner.caption(img)
|
| 332 |
+
caption = captioner.build_caption(results, cat)
|
| 333 |
+
if not sample_logged:
|
| 334 |
+
logger.info(f"Sample caption:\n {caption[:300]}...")
|
| 335 |
+
sample_logged = True
|
| 336 |
+
except Exception:
|
| 337 |
+
logger.warning(f"Caption failed [{img_path.name}]:\n{traceback.format_exc()}")
|
| 338 |
+
failed += 1
|
| 339 |
+
continue
|
| 340 |
+
|
| 341 |
+
img_path.with_suffix(".txt").write_text(caption, encoding="utf-8")
|
| 342 |
+
metadata.append({
|
| 343 |
+
"image": str(img_path),
|
| 344 |
+
"caption_file": str(img_path.with_suffix(".txt")),
|
| 345 |
+
"caption": caption,
|
| 346 |
+
"visual": results.get("visual", ""),
|
| 347 |
+
"ocr": results.get("ocr", ""),
|
| 348 |
+
"regions": results.get("regions", ""),
|
| 349 |
+
"category": cat,
|
| 350 |
+
"width": img.size[0],
|
| 351 |
+
"height": img.size[1],
|
| 352 |
+
"timestamp": datetime.now().isoformat(),
|
| 353 |
+
})
|
| 354 |
+
done.add(str(img_path))
|
| 355 |
+
if len(done) % 50 == 0:
|
| 356 |
+
save_checkpoint(ckpt_path, done)
|
| 357 |
+
|
| 358 |
+
save_checkpoint(ckpt_path, done)
|
| 359 |
+
meta_path.write_text(
|
| 360 |
+
json.dumps(metadata, indent=2, ensure_ascii=False), encoding="utf-8"
|
| 361 |
+
)
|
| 362 |
+
logger.info(f"Done — captioned: {len(metadata)}, failed/skipped: {failed}")
|
| 363 |
+
|
| 364 |
+
if torch.cuda.is_available():
|
| 365 |
+
torch.cuda.empty_cache()
|
| 366 |
+
|
| 367 |
+
logger.info("All splits complete.")
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def main():
|
| 371 |
+
p = argparse.ArgumentParser(description="Florence-2 Caption Generator")
|
| 372 |
+
p.add_argument("--config", default="configs/config.yaml")
|
| 373 |
+
p.add_argument("--splits", nargs="+", default=["train", "val", "test"])
|
| 374 |
+
args = p.parse_args()
|
| 375 |
+
run_captioning(load_config(args.config), args.splits)
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
if __name__ == "__main__":
|
| 379 |
+
main()
|
scripts/count_images.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Count images in data/processed subdirectories
|
| 3 |
+
# Usage: bash scripts/count_images.sh
|
| 4 |
+
|
| 5 |
+
TARGET=1300
|
| 6 |
+
DATA_DIR="data/processed"
|
| 7 |
+
|
| 8 |
+
echo "=================================================="
|
| 9 |
+
echo " PROCESSED IMAGE COUNT REPORT (Target: $TARGET)"
|
| 10 |
+
echo "=================================================="
|
| 11 |
+
printf "%-40s %6s %10s\n" "CATEGORY" "COUNT" "STATUS"
|
| 12 |
+
echo "--------------------------------------------------------"
|
| 13 |
+
|
| 14 |
+
total_imgs=0
|
| 15 |
+
pass_count=0
|
| 16 |
+
fail_count=0
|
| 17 |
+
|
| 18 |
+
# Find all subdirectories that contain images
|
| 19 |
+
# Using find to get directories, then counting files inside
|
| 20 |
+
find "$DATA_DIR" -mindepth 2 -maxdepth 2 -type d | sort | while read -r dir; do
|
| 21 |
+
# Count image files (case insensitive extensions)
|
| 22 |
+
count=$(find "$dir" -maxdepth 1 -type f | grep -iE "\.(jpg|jpeg|png|webp|bmp)$" | wc -l)
|
| 23 |
+
|
| 24 |
+
# Get relative path (category/subcategory)
|
| 25 |
+
rel_path=${dir#$DATA_DIR/}
|
| 26 |
+
|
| 27 |
+
if [ "$count" -ge "$TARGET" ]; then
|
| 28 |
+
status="✅ PASS"
|
| 29 |
+
((pass_count++))
|
| 30 |
+
else
|
| 31 |
+
status="❌ FAIL"
|
| 32 |
+
((fail_count++))
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
if [ "$count" -gt 0 ]; then
|
| 36 |
+
printf "%-40s %6d %10s\n" "$rel_path" "$count" "$status"
|
| 37 |
+
total_imgs=$((total_imgs + count))
|
| 38 |
+
fi
|
| 39 |
+
done
|
| 40 |
+
|
| 41 |
+
echo "--------------------------------------------------------"
|
| 42 |
+
# Recalculate total because of pipe subshell scope issue in bash
|
| 43 |
+
grand_total=$(find "$DATA_DIR" -type f | grep -iE "\.(jpg|jpeg|png|webp|bmp)$" | wc -l)
|
| 44 |
+
echo "TOTAL: $grand_total images across all processed categories"
|
| 45 |
+
echo "=================================================="
|
| 46 |
+
|
| 47 |
+
# Check for failures (Need a separate loop or temp file to persist fail_count if strict,
|
| 48 |
+
# but for visual report this is fine)
|
| 49 |
+
# To actually return bad exit code if failed:
|
| 50 |
+
failures=$(find "$DATA_DIR" -mindepth 2 -maxdepth 2 -type d | while read -r d; do
|
| 51 |
+
c=$(find "$d" -maxdepth 1 -type f | grep -iE "\.(jpg|jpeg|png|webp|bmp)$" | wc -l);
|
| 52 |
+
if [ "$c" -lt "$TARGET" ] && [ "$c" -gt 0 ]; then echo "fail"; fi;
|
| 53 |
+
done | wc -l)
|
| 54 |
+
|
| 55 |
+
if [ "$failures" -gt 0 ]; then
|
| 56 |
+
echo "⚠️ $failures categories are below target ($TARGET)."
|
| 57 |
+
echo " Run 'python scripts/targeted_filter_v2.py' to fix."
|
| 58 |
+
else
|
| 59 |
+
echo "🎉 All categories meet the target goal!"
|
| 60 |
+
fi
|
scripts/count_splits.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
# Config
|
| 6 |
+
data_root = Path("data")
|
| 7 |
+
train_dir = data_root / "train"
|
| 8 |
+
val_dir = data_root / "val"
|
| 9 |
+
test_dir = data_root / "test"
|
| 10 |
+
IMG_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
|
| 11 |
+
|
| 12 |
+
def count_images_in_dir(d: Path) -> int:
|
| 13 |
+
if not d.exists():
|
| 14 |
+
return 0
|
| 15 |
+
return len([f for f in os.listdir(d) if Path(f).suffix.lower() in IMG_EXTENSIONS])
|
| 16 |
+
|
| 17 |
+
# Find all categories from processed dir (source of truth)
|
| 18 |
+
processed_dir = data_root / "processed"
|
| 19 |
+
categories = set()
|
| 20 |
+
|
| 21 |
+
if processed_dir.exists():
|
| 22 |
+
for root, dirs, files in os.walk(processed_dir):
|
| 23 |
+
if any(Path(f).suffix.lower() in IMG_EXTENSIONS for f in files):
|
| 24 |
+
rel = Path(root).relative_to(processed_dir)
|
| 25 |
+
categories.add(str(rel).replace("\\", "/"))
|
| 26 |
+
else:
|
| 27 |
+
# Fallback: finding categories from splits directly
|
| 28 |
+
for d in [train_dir, val_dir, test_dir]:
|
| 29 |
+
if d.exists():
|
| 30 |
+
for root, dirs, files in os.walk(d):
|
| 31 |
+
if any(Path(f).suffix.lower() in IMG_EXTENSIONS for f in files):
|
| 32 |
+
rel = Path(root).relative_to(d)
|
| 33 |
+
categories.add(str(rel).replace("\\", "/"))
|
| 34 |
+
|
| 35 |
+
print(f"{'Category':<40} | {'Train':<6} | {'Val':<5} | {'Test':<5} | {'Total':<6} | {'% Train':<8}")
|
| 36 |
+
print("-" * 100)
|
| 37 |
+
|
| 38 |
+
grand_totals = {"train": 0, "val": 0, "test": 0, "total": 0}
|
| 39 |
+
|
| 40 |
+
for cat in sorted(list(categories)):
|
| 41 |
+
c_train = count_images_in_dir(train_dir / cat)
|
| 42 |
+
c_val = count_images_in_dir(val_dir / cat)
|
| 43 |
+
c_test = count_images_in_dir(test_dir / cat)
|
| 44 |
+
total = c_train + c_val + c_test
|
| 45 |
+
|
| 46 |
+
grand_totals["train"] += c_train
|
| 47 |
+
grand_totals["val"] += c_val
|
| 48 |
+
grand_totals["test"] += c_test
|
| 49 |
+
grand_totals["total"] += total
|
| 50 |
+
|
| 51 |
+
pct_train = (c_train / total * 100) if total > 0 else 0.0
|
| 52 |
+
|
| 53 |
+
print(f"{cat:<40} | {c_train:<6} | {c_val:<5} | {c_test:<5} | {total:<6} | {pct_train:.1f}%")
|
| 54 |
+
|
| 55 |
+
print("-" * 100)
|
| 56 |
+
t_train = grand_totals['train']
|
| 57 |
+
t_total = grand_totals['total']
|
| 58 |
+
t_pct = (t_train / t_total * 100) if t_total > 0 else 0
|
| 59 |
+
print(f"{'TOTAL':<40} | {t_train:<6} | {grand_totals['val']:<5} | {grand_totals['test']:<5} | {t_total:<6} | {t_pct:.1f}%")
|
scripts/create_training_config.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Create Training Config
|
| 4 |
+
Reads the master config.yaml and generates an ai-toolkit compatible
|
| 5 |
+
YAML training config at configs/train_sdxl_lora.yaml.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import argparse
|
| 11 |
+
import logging
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
import yaml
|
| 15 |
+
|
| 16 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def load_config(config_path: str = "configs/config.yaml") -> dict:
|
| 21 |
+
with open(config_path, "r", encoding="utf-8") as f:
|
| 22 |
+
return yaml.safe_load(f)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def generate_ai_toolkit_config(config: dict, output_path: str):
|
| 26 |
+
"""
|
| 27 |
+
Generate an ai-toolkit compatible training config from master config.
|
| 28 |
+
ai-toolkit expects a specific YAML schema for training SDXL LoRA.
|
| 29 |
+
"""
|
| 30 |
+
sdxl_cfg = config.get("models", {}).get("sdxl", {})
|
| 31 |
+
training_cfg = config.get("training", {})
|
| 32 |
+
sdxl_lora_cfg = training_cfg.get("sdxl_lora", {})
|
| 33 |
+
lora_cfg = sdxl_lora_cfg.get("lora", {})
|
| 34 |
+
optim_cfg = sdxl_lora_cfg.get("optimizer", {})
|
| 35 |
+
sched_cfg = sdxl_lora_cfg.get("scheduler", {})
|
| 36 |
+
snr_cfg = sdxl_lora_cfg.get("min_snr_gamma", {})
|
| 37 |
+
paths_cfg = config.get("paths", {})
|
| 38 |
+
|
| 39 |
+
# Base model
|
| 40 |
+
base_model = sdxl_cfg.get("repo_id", "stabilityai/stable-diffusion-xl-base-1.0")
|
| 41 |
+
|
| 42 |
+
# Paths
|
| 43 |
+
data_dir = os.path.abspath(paths_cfg.get("data", {}).get("train", "data/train"))
|
| 44 |
+
output_dir = os.path.abspath(
|
| 45 |
+
paths_cfg.get("models", {}).get("sdxl", {}).get("checkpoints", "models/sdxl/checkpoints")
|
| 46 |
+
)
|
| 47 |
+
log_dir = os.path.abspath(
|
| 48 |
+
paths_cfg.get("logs", {}).get("tensorboard", "logs/tensorboard")
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# LoRA params
|
| 52 |
+
rank = lora_cfg.get("rank", 32)
|
| 53 |
+
alpha = lora_cfg.get("alpha", 16)
|
| 54 |
+
dropout = lora_cfg.get("dropout", 0.05)
|
| 55 |
+
|
| 56 |
+
# Training params
|
| 57 |
+
batch_size = sdxl_lora_cfg.get("batch_size", 1)
|
| 58 |
+
grad_accum = sdxl_lora_cfg.get("gradient_accumulation_steps", 4)
|
| 59 |
+
lr = optim_cfg.get("learning_rate", 1e-4)
|
| 60 |
+
epochs = sdxl_lora_cfg.get("epochs", 4)
|
| 61 |
+
max_steps = sdxl_lora_cfg.get("max_steps", 12800)
|
| 62 |
+
warmup_steps = sched_cfg.get("warmup_steps", 100)
|
| 63 |
+
weight_decay = optim_cfg.get("weight_decay", 0.01)
|
| 64 |
+
|
| 65 |
+
betas = optim_cfg.get("betas", [0.9, 0.999])
|
| 66 |
+
|
| 67 |
+
# Resolution
|
| 68 |
+
height = sdxl_cfg.get("height", 1024)
|
| 69 |
+
width = sdxl_cfg.get("width", 1024)
|
| 70 |
+
|
| 71 |
+
# Seed
|
| 72 |
+
seed = config.get("project", {}).get("seed", 42)
|
| 73 |
+
|
| 74 |
+
# Mixed precision
|
| 75 |
+
mixed_prec = training_cfg.get("mixed_precision", {})
|
| 76 |
+
dtype = mixed_prec.get("dtype", "bf16")
|
| 77 |
+
|
| 78 |
+
# Build ai-toolkit config
|
| 79 |
+
aitk_config = {
|
| 80 |
+
"job": "extension",
|
| 81 |
+
"config": {
|
| 82 |
+
"name": "campus_ai_poster_sdxl",
|
| 83 |
+
"process": [
|
| 84 |
+
{
|
| 85 |
+
"type": "sd_trainer",
|
| 86 |
+
"training_folder": output_dir,
|
| 87 |
+
"device": "cuda:0",
|
| 88 |
+
"trigger_word": "campus_ai_poster",
|
| 89 |
+
"network": {
|
| 90 |
+
"type": "lora",
|
| 91 |
+
"linear": rank,
|
| 92 |
+
"linear_alpha": alpha,
|
| 93 |
+
"dropout": dropout,
|
| 94 |
+
"network_kwargs": {
|
| 95 |
+
"lora_plus_lr_ratio": lora_cfg.get("lora_plus_ratio", 1.0),
|
| 96 |
+
},
|
| 97 |
+
},
|
| 98 |
+
"save": {
|
| 99 |
+
"dtype": dtype,
|
| 100 |
+
"save_every": sdxl_lora_cfg.get("checkpointing", {}).get("save_steps", 500),
|
| 101 |
+
"max_step_saves_to_keep": sdxl_lora_cfg.get("checkpointing", {}).get("save_total_limit", 5),
|
| 102 |
+
},
|
| 103 |
+
"datasets": [
|
| 104 |
+
{
|
| 105 |
+
"folder_path": data_dir,
|
| 106 |
+
"caption_ext": "txt",
|
| 107 |
+
"caption_dropout_rate": 0.1,
|
| 108 |
+
"shuffle_tokens": True,
|
| 109 |
+
"cache_latents_to_disk": True,
|
| 110 |
+
"num_workers": 8,
|
| 111 |
+
"resolution": [width, height],
|
| 112 |
+
}
|
| 113 |
+
],
|
| 114 |
+
"train": {
|
| 115 |
+
"batch_size": batch_size,
|
| 116 |
+
"steps": max_steps if max_steps > 0 else 12800,
|
| 117 |
+
"gradient_accumulation_steps": grad_accum,
|
| 118 |
+
"train_unet": True,
|
| 119 |
+
"train_text_encoder": False,
|
| 120 |
+
"disable_sampling": True,
|
| 121 |
+
"gradient_checkpointing": True,
|
| 122 |
+
"noise_scheduler": "ddpm",
|
| 123 |
+
"optimizer": optim_cfg.get("type", "adamw8bit"),
|
| 124 |
+
"lr": lr,
|
| 125 |
+
"lr_warmup_steps": warmup_steps,
|
| 126 |
+
"min_snr_gamma": snr_cfg.get("gamma", 5.0) if snr_cfg.get("enabled", True) else None,
|
| 127 |
+
"optimizer_params": {
|
| 128 |
+
"weight_decay": weight_decay,
|
| 129 |
+
"betas": betas,
|
| 130 |
+
},
|
| 131 |
+
"ema_config": {
|
| 132 |
+
"use_ema": True,
|
| 133 |
+
"ema_decay": 0.999,
|
| 134 |
+
},
|
| 135 |
+
"dtype": dtype,
|
| 136 |
+
"lr_scheduler": sched_cfg.get("type", "cosine_with_restarts"),
|
| 137 |
+
"lr_scheduler_params": {
|
| 138 |
+
"T_0": max(1, (max_steps if max_steps > 0 else 12800) // sched_cfg.get("num_cycles", 3)),
|
| 139 |
+
"T_mult": 1,
|
| 140 |
+
"eta_min": lr / 10,
|
| 141 |
+
},
|
| 142 |
+
},
|
| 143 |
+
"model": {
|
| 144 |
+
"name_or_path": base_model,
|
| 145 |
+
"is_xl": True,
|
| 146 |
+
},
|
| 147 |
+
"sample": {
|
| 148 |
+
"sampler": "euler_a",
|
| 149 |
+
"sample_every": 999999,
|
| 150 |
+
"width": width,
|
| 151 |
+
"height": height,
|
| 152 |
+
"prompts": [
|
| 153 |
+
"campus_ai_poster a vibrant technology fest poster with neon colors and bold typography",
|
| 154 |
+
"campus_ai_poster a colorful Diwali celebration poster with golden diyas and rangoli",
|
| 155 |
+
"campus_ai_poster a professional workshop seminar poster with modern minimalist design",
|
| 156 |
+
"campus_ai_poster a dynamic sports tournament poster with action silhouettes",
|
| 157 |
+
],
|
| 158 |
+
"neg": "",
|
| 159 |
+
"seed": seed,
|
| 160 |
+
"walk_seed": True,
|
| 161 |
+
"guidance_scale": 5,
|
| 162 |
+
"sample_steps": 28,
|
| 163 |
+
},
|
| 164 |
+
"logging": {
|
| 165 |
+
"log_every": sdxl_lora_cfg.get("logging", {}).get("steps", 10),
|
| 166 |
+
"use_wandb": config.get("monitoring", {}).get("wandb", {}).get("enabled", False),
|
| 167 |
+
"verbose": True,
|
| 168 |
+
},
|
| 169 |
+
}
|
| 170 |
+
],
|
| 171 |
+
"meta": {
|
| 172 |
+
"name": "campus_ai_v1",
|
| 173 |
+
"version": "1.0",
|
| 174 |
+
},
|
| 175 |
+
},
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
# Write output
|
| 179 |
+
output_file = Path(output_path)
|
| 180 |
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
| 181 |
+
|
| 182 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 183 |
+
yaml.dump(aitk_config, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
| 184 |
+
|
| 185 |
+
logger.info(f"ai-toolkit training config written to: {output_file}")
|
| 186 |
+
logger.info(f" Base model: {base_model}")
|
| 187 |
+
logger.info(f" Dataset dir: {data_dir}")
|
| 188 |
+
logger.info(f" Output dir: {output_dir}")
|
| 189 |
+
logger.info(f" LoRA rank: {rank}, alpha: {alpha}")
|
| 190 |
+
logger.info(f" Batch size: {batch_size}, Grad accum: {grad_accum}")
|
| 191 |
+
logger.info(f" Learning rate: {lr}")
|
| 192 |
+
logger.info(f" Resolution: {width}x{height}")
|
| 193 |
+
logger.info(f" Mixed precision: {dtype}")
|
| 194 |
+
|
| 195 |
+
return aitk_config
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def main():
|
| 199 |
+
parser = argparse.ArgumentParser(description="Generate ai-toolkit Training Config")
|
| 200 |
+
parser.add_argument("--config", default="configs/config.yaml", help="Path to master config.yaml")
|
| 201 |
+
parser.add_argument("--output", default="configs/train_sdxl_lora.yaml", help="Output path for ai-toolkit config")
|
| 202 |
+
args = parser.parse_args()
|
| 203 |
+
|
| 204 |
+
config = load_config(args.config)
|
| 205 |
+
generate_ai_toolkit_config(config, args.output)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
if __name__ == "__main__":
|
| 209 |
+
main()
|
scripts/deploy_to_hf.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from huggingface_hub import HfApi, login
|
| 4 |
+
|
| 5 |
+
def deploy_model():
|
| 6 |
+
print("="*60)
|
| 7 |
+
print(" CAMPUS AI - HUGGING FACE DEPLOYMENT")
|
| 8 |
+
print("="*60)
|
| 9 |
+
|
| 10 |
+
# 1. Ask for credentials and repo ID
|
| 11 |
+
hf_token = input("\nEnter your Hugging Face WRITE Token (paste and press Enter): ").strip()
|
| 12 |
+
repo_id = input("Enter your Hugging Face Repository ID (e.g. your_username/campus-ai-poster-sdxl): ").strip()
|
| 13 |
+
|
| 14 |
+
if not hf_token or not repo_id:
|
| 15 |
+
print("\n[!] Error: Token and Repository ID are required.")
|
| 16 |
+
sys.exit(1)
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
# 2. Authenticate
|
| 20 |
+
print("\n[+] Authenticating with Hugging Face...")
|
| 21 |
+
login(token=hf_token)
|
| 22 |
+
api = HfApi()
|
| 23 |
+
|
| 24 |
+
# 3. Verify Phase 3 Model exists
|
| 25 |
+
model_dir = "models/sdxl/checkpoints/campus_ai_poster_sdxl_phase3"
|
| 26 |
+
model_file = os.path.join(model_dir, "campus_ai_poster_sdxl_phase3.safetensors")
|
| 27 |
+
|
| 28 |
+
if not os.path.exists(model_file):
|
| 29 |
+
print(f"\n[!] Error: Phase 3 model not found at {model_file}!")
|
| 30 |
+
print("Make sure Phase 3 training has finished successfully.")
|
| 31 |
+
sys.exit(1)
|
| 32 |
+
|
| 33 |
+
print("\n[+] Creating/Verifying repository...")
|
| 34 |
+
api.create_repo(repo_id=repo_id, exist_ok=True, private=False)
|
| 35 |
+
|
| 36 |
+
# 4. Upload the model
|
| 37 |
+
print(f"\n[+] Uploading Phase 3 Model to {repo_id}...")
|
| 38 |
+
api.upload_file(
|
| 39 |
+
path_or_fileobj=model_file,
|
| 40 |
+
path_in_repo="campus_ai_poster_sdxl_phase3.safetensors",
|
| 41 |
+
repo_id=repo_id,
|
| 42 |
+
repo_type="model",
|
| 43 |
+
commit_message="Upload final Campus AI Phase 3 LoRA weights"
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
print("\n" + "="*60)
|
| 47 |
+
print(f" ✅ DEPLOYMENT SUCCESSFUL!")
|
| 48 |
+
print(f" Model is now live at: https://huggingface.co/{repo_id}")
|
| 49 |
+
print("="*60)
|
| 50 |
+
print("You can now connect this model directly to your Hugging Face space.")
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"\n[!] Deployment Failed: {str(e)}")
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
deploy_model()
|
scripts/image_deduplicator.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sqlite3
|
| 3 |
+
import imagehash
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
import logging
|
| 8 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 9 |
+
import time
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
class GlobalImageDeduplicator:
|
| 14 |
+
"""
|
| 15 |
+
Globally tracks perceptual hashes of all images in the data directory
|
| 16 |
+
to prevent downloading duplicates across all subfolders and phases.
|
| 17 |
+
Uses an SQLite database for persistent caching to speed up initialization.
|
| 18 |
+
"""
|
| 19 |
+
def __init__(self, data_dir: str, db_path: str = None, hash_size: int = 8, threshold: int = 5):
|
| 20 |
+
self.data_dir = Path(data_dir)
|
| 21 |
+
if db_path is None:
|
| 22 |
+
# Store at root/data/phash_cache.db
|
| 23 |
+
self.db_path = self.data_dir / "phash_cache.db"
|
| 24 |
+
else:
|
| 25 |
+
self.db_path = Path(db_path)
|
| 26 |
+
|
| 27 |
+
self.hash_size = hash_size
|
| 28 |
+
self.threshold = threshold
|
| 29 |
+
self.hashes = [] # List of (filepath, imagehash.ImageHash)
|
| 30 |
+
|
| 31 |
+
logger.info(f"Initializing Global Image Deduplicator using DB: {self.db_path}")
|
| 32 |
+
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
| 33 |
+
self._init_db()
|
| 34 |
+
self._load_and_sync()
|
| 35 |
+
|
| 36 |
+
def _init_db(self):
|
| 37 |
+
with self.conn:
|
| 38 |
+
self.conn.execute('''
|
| 39 |
+
CREATE TABLE IF NOT EXISTS phashes (
|
| 40 |
+
filepath TEXT PRIMARY KEY,
|
| 41 |
+
mtime REAL,
|
| 42 |
+
hash_str TEXT
|
| 43 |
+
)
|
| 44 |
+
''')
|
| 45 |
+
|
| 46 |
+
def _load_and_sync(self):
|
| 47 |
+
logger.info(f"Scanning {self.data_dir} for images...")
|
| 48 |
+
all_files = []
|
| 49 |
+
for ext in ('*.jpg', '*.jpeg', '*.png', '*.webp'):
|
| 50 |
+
all_files.extend(self.data_dir.rglob(ext))
|
| 51 |
+
|
| 52 |
+
# Get existing from DB
|
| 53 |
+
cursor = self.conn.cursor()
|
| 54 |
+
cursor.execute("SELECT filepath, mtime, hash_str FROM phashes")
|
| 55 |
+
db_records = {row[0]: (row[1], row[2]) for row in cursor.fetchall()}
|
| 56 |
+
|
| 57 |
+
to_hash = []
|
| 58 |
+
to_delete = []
|
| 59 |
+
|
| 60 |
+
# Determine what needs hashing
|
| 61 |
+
current_files = set(str(f) for f in all_files)
|
| 62 |
+
|
| 63 |
+
for f in all_files:
|
| 64 |
+
f_str = str(f)
|
| 65 |
+
mtime = os.path.getmtime(f)
|
| 66 |
+
if f_str in db_records:
|
| 67 |
+
# If modified time changed, rehash
|
| 68 |
+
if db_records[f_str][0] < mtime:
|
| 69 |
+
to_hash.append((f_str, f, mtime))
|
| 70 |
+
else:
|
| 71 |
+
to_hash.append((f_str, f, mtime))
|
| 72 |
+
|
| 73 |
+
for db_file in db_records:
|
| 74 |
+
if db_file not in current_files:
|
| 75 |
+
to_delete.append(db_file)
|
| 76 |
+
|
| 77 |
+
# Delete missing files from DB
|
| 78 |
+
if to_delete:
|
| 79 |
+
logger.info(f"Removing {len(to_delete)} deleted files from cache.")
|
| 80 |
+
with self.conn:
|
| 81 |
+
self.conn.executemany("DELETE FROM phashes WHERE filepath = ?", [(f,) for f in to_delete])
|
| 82 |
+
|
| 83 |
+
# Hash new or modified files
|
| 84 |
+
if to_hash:
|
| 85 |
+
logger.info(f"Hashing {len(to_hash)} new/modified images. This might take a while...")
|
| 86 |
+
|
| 87 |
+
def compute_hash(args):
|
| 88 |
+
f_str, f, mtime = args
|
| 89 |
+
try:
|
| 90 |
+
with Image.open(f) as img:
|
| 91 |
+
# Convert to RGB to be safe and avoid issues with alpha channels
|
| 92 |
+
conv_img = img.convert("RGB")
|
| 93 |
+
h = imagehash.phash(conv_img, hash_size=self.hash_size)
|
| 94 |
+
return f_str, mtime, str(h)
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.debug(f"Error hashing {f}: {e}")
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
+
results = []
|
| 100 |
+
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
| 101 |
+
for res in tqdm(executor.map(compute_hash, to_hash), total=len(to_hash), desc="Hashing"):
|
| 102 |
+
if res is not None:
|
| 103 |
+
results.append(res)
|
| 104 |
+
|
| 105 |
+
# Save new hashes to DB
|
| 106 |
+
with self.conn:
|
| 107 |
+
self.conn.executemany("INSERT OR REPLACE INTO phashes (filepath, mtime, hash_str) VALUES (?, ?, ?)", results)
|
| 108 |
+
|
| 109 |
+
# Load all hashes into memory for fast comparison
|
| 110 |
+
cursor.execute("SELECT filepath, hash_str FROM phashes")
|
| 111 |
+
|
| 112 |
+
for filepath, hash_str in cursor.fetchall():
|
| 113 |
+
self.hashes.append((filepath, imagehash.hex_to_hash(hash_str)))
|
| 114 |
+
|
| 115 |
+
logger.info(f"Loaded {len(self.hashes)} image hashes for deduplication.")
|
| 116 |
+
|
| 117 |
+
def is_duplicate(self, img: Image.Image, save_path: str = None) -> bool:
|
| 118 |
+
"""
|
| 119 |
+
Check if an image is a duplicate of any globally known image.
|
| 120 |
+
If save_path is provided, and it's NOT a duplicate, it adds the hash to the in-memory
|
| 121 |
+
cache immediately so we don't download the same duplicate in the same session.
|
| 122 |
+
"""
|
| 123 |
+
# Ensure RGB
|
| 124 |
+
if img.mode != 'RGB':
|
| 125 |
+
img = img.convert('RGB')
|
| 126 |
+
|
| 127 |
+
h = imagehash.phash(img, hash_size=self.hash_size)
|
| 128 |
+
|
| 129 |
+
for existing_path, existing_hash in self.hashes:
|
| 130 |
+
if abs(h - existing_hash) <= self.threshold:
|
| 131 |
+
# logger.debug(f"Duplicate found! Matches {existing_path}")
|
| 132 |
+
return True
|
| 133 |
+
|
| 134 |
+
if save_path:
|
| 135 |
+
self.hashes.append((str(save_path), h))
|
| 136 |
+
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
def add_to_disk_cache(self, filepath: str, img: Image.Image):
|
| 140 |
+
"""
|
| 141 |
+
Manually add an image to the DB cache. Use this after saving an image to disk
|
| 142 |
+
so next time we run, it's already in the DB.
|
| 143 |
+
"""
|
| 144 |
+
if img.mode != 'RGB':
|
| 145 |
+
img = img.convert('RGB')
|
| 146 |
+
h = imagehash.phash(img, hash_size=self.hash_size)
|
| 147 |
+
# Wait slightly to ensure mtime is written
|
| 148 |
+
time.sleep(0.01)
|
| 149 |
+
mtime = os.path.getmtime(filepath)
|
| 150 |
+
with self.conn:
|
| 151 |
+
self.conn.execute("INSERT OR REPLACE INTO phashes (filepath, mtime, hash_str) VALUES (?, ?, ?)",
|
| 152 |
+
(str(filepath), mtime, str(h)))
|
scripts/monitor_downloads.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Monitor download progress across all subcategories."""
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
RAW_DIR = Path("data/raw")
|
| 8 |
+
TARGET_PER_SUBFOLDER = 1900
|
| 9 |
+
|
| 10 |
+
def count_images():
|
| 11 |
+
"""Count images in each subfolder and show progress."""
|
| 12 |
+
os.system("cls" if os.name == "nt" else "clear")
|
| 13 |
+
|
| 14 |
+
total_images = 0
|
| 15 |
+
total_target = 0
|
| 16 |
+
rows = []
|
| 17 |
+
|
| 18 |
+
for parent in sorted(RAW_DIR.iterdir()):
|
| 19 |
+
if not parent.is_dir():
|
| 20 |
+
continue
|
| 21 |
+
for sub in sorted(parent.iterdir()):
|
| 22 |
+
if not sub.is_dir():
|
| 23 |
+
continue
|
| 24 |
+
count = sum(
|
| 25 |
+
1 for f in sub.iterdir()
|
| 26 |
+
if f.suffix.lower() in {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
|
| 27 |
+
)
|
| 28 |
+
remaining = max(0, TARGET_PER_SUBFOLDER - count)
|
| 29 |
+
pct = min(100, count / TARGET_PER_SUBFOLDER * 100)
|
| 30 |
+
bar = "█" * int(pct // 5) + "░" * (20 - int(pct // 5))
|
| 31 |
+
status = "✅" if count >= TARGET_PER_SUBFOLDER else "⏳"
|
| 32 |
+
|
| 33 |
+
category = f"{parent.name}/{sub.name}"
|
| 34 |
+
rows.append((category, count, remaining, pct, bar, status))
|
| 35 |
+
total_images += count
|
| 36 |
+
total_target += TARGET_PER_SUBFOLDER
|
| 37 |
+
|
| 38 |
+
# Print header
|
| 39 |
+
total_remaining = max(0, total_target - total_images)
|
| 40 |
+
total_pct = total_images / total_target * 100 if total_target > 0 else 0
|
| 41 |
+
print(f"{'='*80}")
|
| 42 |
+
print(f" 📊 DOWNLOAD MONITOR | {total_images:,} / {total_target:,} images "
|
| 43 |
+
f"({total_pct:.1f}%) | {total_remaining:,} remaining")
|
| 44 |
+
print(f"{'='*80}")
|
| 45 |
+
print(f" {'Category':<35} {'Count':>6} {'Left':>6} {'Progress':<24} ")
|
| 46 |
+
print(f" {'-'*35} {'-'*6} {'-'*6} {'-'*24}")
|
| 47 |
+
|
| 48 |
+
for category, count, remaining, pct, bar, status in rows:
|
| 49 |
+
print(f" {category:<35} {count:>6} {remaining:>6} {bar} {pct:5.1f}% {status}")
|
| 50 |
+
|
| 51 |
+
print(f" {'-'*35} {'-'*6} {'-'*6} {'-'*24}")
|
| 52 |
+
total_bar = "█" * int(total_pct // 5) + "░" * (20 - int(total_pct // 5))
|
| 53 |
+
print(f" {'TOTAL':<35} {total_images:>6} {total_remaining:>6} {total_bar} {total_pct:5.1f}%")
|
| 54 |
+
print(f"\n Last updated: {time.strftime('%H:%M:%S')} | Refreshing every 1s | Ctrl+C to stop")
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
while True:
|
| 58 |
+
try:
|
| 59 |
+
count_images()
|
| 60 |
+
time.sleep(30)
|
| 61 |
+
except KeyboardInterrupt:
|
| 62 |
+
print("\n\n Monitoring stopped.")
|
| 63 |
+
break
|
scripts/pinterest_scraper.py
ADDED
|
@@ -0,0 +1,862 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Pinterest Poster Image Scraper
|
| 4 |
+
Config-driven scraper using Selenium + BeautifulSoup.
|
| 5 |
+
Reads queries from config.yaml, downloads poster images to data/raw/{category}/
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import time
|
| 11 |
+
import hashlib
|
| 12 |
+
import logging
|
| 13 |
+
import argparse
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from io import BytesIO
|
| 16 |
+
from urllib.parse import urljoin
|
| 17 |
+
import yaml
|
| 18 |
+
import requests
|
| 19 |
+
import imagehash
|
| 20 |
+
from PIL import Image
|
| 21 |
+
from tqdm import tqdm
|
| 22 |
+
|
| 23 |
+
from image_deduplicator import GlobalImageDeduplicator
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
from selenium import webdriver
|
| 27 |
+
from selenium.webdriver.chrome.service import Service
|
| 28 |
+
from selenium.webdriver.chrome.options import Options
|
| 29 |
+
from selenium.webdriver.common.by import By
|
| 30 |
+
from selenium.webdriver.common.keys import Keys
|
| 31 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
| 32 |
+
from selenium.webdriver.support import expected_conditions as EC
|
| 33 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 34 |
+
HAS_SELENIUM = True
|
| 35 |
+
except ImportError:
|
| 36 |
+
HAS_SELENIUM = False
|
| 37 |
+
print("WARNING: selenium/webdriver_manager not installed. Install with:")
|
| 38 |
+
print(" pip install selenium webdriver-manager")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
from bs4 import BeautifulSoup
|
| 42 |
+
HAS_BS4 = True
|
| 43 |
+
except ImportError:
|
| 44 |
+
HAS_BS4 = False
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 48 |
+
# Logging
|
| 49 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 50 |
+
logging.basicConfig(
|
| 51 |
+
level=logging.INFO,
|
| 52 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 53 |
+
handlers=[logging.StreamHandler()]
|
| 54 |
+
)
|
| 55 |
+
logger = logging.getLogger(__name__)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 59 |
+
# Config
|
| 60 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 61 |
+
def load_config(config_path: str = "configs/config.yaml") -> dict:
|
| 62 |
+
"""Load master config file."""
|
| 63 |
+
with open(config_path, "r", encoding="utf-8") as f:
|
| 64 |
+
return yaml.safe_load(f)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 68 |
+
# Default search queries (per category) – can be overridden in config
|
| 69 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 70 |
+
DEFAULT_QUERIES = {
|
| 71 |
+
# ══════════════════════════════════════════════════════════════
|
| 72 |
+
# TECH FEST (parent: tech_fest/)
|
| 73 |
+
# ══════════════════════════════════════════════════════════════
|
| 74 |
+
"tech_fest/hackathon": [
|
| 75 |
+
"hackathon poster design",
|
| 76 |
+
"24 hour hackathon event poster",
|
| 77 |
+
"code sprint competition poster",
|
| 78 |
+
"startup hackathon poster design",
|
| 79 |
+
"programming hackathon poster",
|
| 80 |
+
"hackathon flyer template",
|
| 81 |
+
"university hackathon invite",
|
| 82 |
+
"tech hackathon banner",
|
| 83 |
+
"coding marathon event poster",
|
| 84 |
+
"hackathon winner announcement",
|
| 85 |
+
"virtual hackathon poster",
|
| 86 |
+
"hackathon timeline graphic",
|
| 87 |
+
"innovate hackathon poster",
|
| 88 |
+
"hackathon ideas poster",
|
| 89 |
+
"hackathon challenge flyer"
|
| 90 |
+
],
|
| 91 |
+
"tech_fest/coding_competition": [
|
| 92 |
+
"coding competition poster design",
|
| 93 |
+
"competitive programming poster",
|
| 94 |
+
"code challenge event poster",
|
| 95 |
+
"algorithm contest poster",
|
| 96 |
+
"debug code competition poster",
|
| 97 |
+
"bug bounty event poster",
|
| 98 |
+
"coding battle flyer",
|
| 99 |
+
"programming contest flyer",
|
| 100 |
+
"code war poster design",
|
| 101 |
+
"coding tournament bracket"
|
| 102 |
+
],
|
| 103 |
+
"tech_fest/ai_ml": [
|
| 104 |
+
"artificial intelligence conference poster",
|
| 105 |
+
"machine learning workshop poster",
|
| 106 |
+
"deep learning summit poster",
|
| 107 |
+
"data science event poster",
|
| 108 |
+
"AI summit poster design",
|
| 109 |
+
],
|
| 110 |
+
"tech_fest/robotics": [
|
| 111 |
+
"robotics event poster design",
|
| 112 |
+
"robot competition poster",
|
| 113 |
+
"robotics workshop poster",
|
| 114 |
+
"drone racing event poster",
|
| 115 |
+
],
|
| 116 |
+
"tech_fest/cybersecurity": [
|
| 117 |
+
"cyber security event poster",
|
| 118 |
+
"ethical hacking workshop poster",
|
| 119 |
+
"CTF competition poster",
|
| 120 |
+
"cybersecurity conference poster",
|
| 121 |
+
],
|
| 122 |
+
"tech_fest/web_app_dev": [
|
| 123 |
+
"web development bootcamp poster",
|
| 124 |
+
"app development workshop poster",
|
| 125 |
+
"full stack developer event poster",
|
| 126 |
+
"software engineering meetup poster",
|
| 127 |
+
],
|
| 128 |
+
"tech_fest/general": [
|
| 129 |
+
"tech fest poster design",
|
| 130 |
+
"technology conference poster",
|
| 131 |
+
"tech expo poster design",
|
| 132 |
+
"tech summit poster design",
|
| 133 |
+
"innovation challenge poster",
|
| 134 |
+
"tech symposium poster",
|
| 135 |
+
"engineering college fest poster",
|
| 136 |
+
"tech week event flyer",
|
| 137 |
+
"future tech event poster",
|
| 138 |
+
"technology showcase poster",
|
| 139 |
+
"IT fest poster design"
|
| 140 |
+
],
|
| 141 |
+
|
| 142 |
+
# ══════════════════════════════════════════════════════════════
|
| 143 |
+
# CULTURAL FEST (parent: cultural_fest/)
|
| 144 |
+
# ══════════════════════════════════════════════════════════════
|
| 145 |
+
"cultural_fest/dance": [
|
| 146 |
+
"dance competition poster design",
|
| 147 |
+
"classical dance event poster",
|
| 148 |
+
"hip hop dance poster",
|
| 149 |
+
"bollywood dance night poster",
|
| 150 |
+
"dance festival poster design",
|
| 151 |
+
],
|
| 152 |
+
"cultural_fest/music": [
|
| 153 |
+
"music concert poster design",
|
| 154 |
+
"live music event poster",
|
| 155 |
+
"DJ night poster design",
|
| 156 |
+
"band performance poster",
|
| 157 |
+
"acoustic night event poster",
|
| 158 |
+
"indie music festival poster",
|
| 159 |
+
],
|
| 160 |
+
"cultural_fest/drama_theatre": [
|
| 161 |
+
"theatre play poster design",
|
| 162 |
+
"drama festival poster",
|
| 163 |
+
"street play nukkad natak poster",
|
| 164 |
+
"stage performance poster",
|
| 165 |
+
],
|
| 166 |
+
"cultural_fest/art_exhibition": [
|
| 167 |
+
"art exhibition poster design",
|
| 168 |
+
"painting exhibition poster",
|
| 169 |
+
"modern art show poster",
|
| 170 |
+
"sculpture exhibition poster",
|
| 171 |
+
"photography exhibition poster",
|
| 172 |
+
],
|
| 173 |
+
"cultural_fest/fashion_show": [
|
| 174 |
+
"fashion show poster design",
|
| 175 |
+
"college fashion event poster",
|
| 176 |
+
"runway show poster design",
|
| 177 |
+
"fashion week poster design",
|
| 178 |
+
],
|
| 179 |
+
"cultural_fest/literary": [
|
| 180 |
+
"literary festival poster",
|
| 181 |
+
"poetry slam event poster",
|
| 182 |
+
"book launch poster design",
|
| 183 |
+
"debate competition poster",
|
| 184 |
+
"storytelling event poster",
|
| 185 |
+
"quiz competition poster",
|
| 186 |
+
],
|
| 187 |
+
"cultural_fest/standup_comedy": [
|
| 188 |
+
"standup comedy show poster",
|
| 189 |
+
"open mic night poster",
|
| 190 |
+
"comedy night poster design",
|
| 191 |
+
"improv comedy poster",
|
| 192 |
+
],
|
| 193 |
+
"cultural_fest/general": [
|
| 194 |
+
"cultural fest poster design",
|
| 195 |
+
"college cultural event poster",
|
| 196 |
+
"cultural night poster India",
|
| 197 |
+
"talent show poster design",
|
| 198 |
+
],
|
| 199 |
+
|
| 200 |
+
# ══════════════════════════════════════════════════════════════
|
| 201 |
+
# SPORTS (parent: sports/)
|
| 202 |
+
# ══════════════════════════════════════════════════════════════
|
| 203 |
+
"sports/cricket": [
|
| 204 |
+
"cricket tournament poster",
|
| 205 |
+
"IPL fan event poster",
|
| 206 |
+
"cricket match poster design",
|
| 207 |
+
"T20 cricket championship poster",
|
| 208 |
+
],
|
| 209 |
+
"sports/football": [
|
| 210 |
+
"football tournament poster design",
|
| 211 |
+
"soccer championship poster",
|
| 212 |
+
"inter-college football poster",
|
| 213 |
+
"futsal tournament poster",
|
| 214 |
+
],
|
| 215 |
+
"sports/basketball": [
|
| 216 |
+
"basketball tournament poster design",
|
| 217 |
+
"3x3 basketball event poster",
|
| 218 |
+
"college basketball championship poster",
|
| 219 |
+
],
|
| 220 |
+
"sports/badminton_tennis": [
|
| 221 |
+
"badminton tournament poster",
|
| 222 |
+
"tennis championship poster",
|
| 223 |
+
"table tennis tournament poster",
|
| 224 |
+
"squash competition poster",
|
| 225 |
+
],
|
| 226 |
+
"sports/athletics": [
|
| 227 |
+
"athletics meet poster design",
|
| 228 |
+
"track and field event poster",
|
| 229 |
+
"marathon poster design",
|
| 230 |
+
"fun run event poster",
|
| 231 |
+
],
|
| 232 |
+
"sports/esports": [
|
| 233 |
+
"esports tournament poster",
|
| 234 |
+
"gaming event poster design",
|
| 235 |
+
"BGMI tournament poster",
|
| 236 |
+
"valorant tournament poster",
|
| 237 |
+
"FIFA tournament poster",
|
| 238 |
+
],
|
| 239 |
+
"sports/kabaddi_kho": [
|
| 240 |
+
"kabaddi tournament poster India",
|
| 241 |
+
"kho kho competition poster",
|
| 242 |
+
"traditional Indian sports poster",
|
| 243 |
+
],
|
| 244 |
+
"sports/yoga_fitness": [
|
| 245 |
+
"yoga day event poster",
|
| 246 |
+
"fitness challenge poster",
|
| 247 |
+
"gym event poster design",
|
| 248 |
+
"wellness camp poster",
|
| 249 |
+
"cycling event poster",
|
| 250 |
+
],
|
| 251 |
+
"sports/general": [
|
| 252 |
+
"sports tournament poster design",
|
| 253 |
+
"college sports day poster",
|
| 254 |
+
"inter-college sports poster",
|
| 255 |
+
"sports carnival poster",
|
| 256 |
+
"annual sports meet poster",
|
| 257 |
+
"sports championship flyer",
|
| 258 |
+
"athletic meet event poster",
|
| 259 |
+
"intramural sports poster",
|
| 260 |
+
"sports league banner",
|
| 261 |
+
"team sports event poster"
|
| 262 |
+
],
|
| 263 |
+
|
| 264 |
+
# ══════════════════════════════════════════════════════════════
|
| 265 |
+
# COLLEGE EVENTS (parent: college_events/)
|
| 266 |
+
# ══════════════════════════════════════════════════════════════
|
| 267 |
+
"college_events/annual_fest": [
|
| 268 |
+
"college fest poster India",
|
| 269 |
+
"university festival poster",
|
| 270 |
+
"college annual day poster",
|
| 271 |
+
"campus fest poster design",
|
| 272 |
+
],
|
| 273 |
+
"college_events/freshers": [
|
| 274 |
+
"freshers party poster design",
|
| 275 |
+
"freshers welcome poster India",
|
| 276 |
+
"welcome party poster design",
|
| 277 |
+
"fresher orientation poster",
|
| 278 |
+
],
|
| 279 |
+
"college_events/farewell": [
|
| 280 |
+
"farewell party poster college",
|
| 281 |
+
"goodbye seniors poster design",
|
| 282 |
+
"senior farewell poster",
|
| 283 |
+
"farewell ceremony poster",
|
| 284 |
+
],
|
| 285 |
+
"college_events/graduation": [
|
| 286 |
+
"graduation ceremony poster",
|
| 287 |
+
"convocation poster design",
|
| 288 |
+
"degree ceremony poster",
|
| 289 |
+
"graduation day poster",
|
| 290 |
+
],
|
| 291 |
+
"college_events/clubs_recruitment": [
|
| 292 |
+
"student club poster design",
|
| 293 |
+
"college society recruitment poster",
|
| 294 |
+
"club recruitment drive poster",
|
| 295 |
+
"join our club poster design",
|
| 296 |
+
],
|
| 297 |
+
"college_events/alumni_reunion": [
|
| 298 |
+
"alumni meet poster design",
|
| 299 |
+
"class reunion poster",
|
| 300 |
+
"homecoming event poster",
|
| 301 |
+
"alumni networking event poster",
|
| 302 |
+
],
|
| 303 |
+
|
| 304 |
+
# ══════════════════════════════════════════════════════════════
|
| 305 |
+
# FESTIVALS (parent: festivals/)
|
| 306 |
+
# ══════════════════════════════════════════════════════════════
|
| 307 |
+
"festivals/diwali": [
|
| 308 |
+
"Diwali celebration poster",
|
| 309 |
+
"Diwali event poster design",
|
| 310 |
+
"Diwali festival poster",
|
| 311 |
+
"Deepavali poster design",
|
| 312 |
+
"Diwali mela poster",
|
| 313 |
+
"Diwali night event poster",
|
| 314 |
+
],
|
| 315 |
+
"festivals/holi": [
|
| 316 |
+
"Holi festival poster design",
|
| 317 |
+
"Holi event poster colorful",
|
| 318 |
+
"Holi party poster design",
|
| 319 |
+
"Holi DJ night poster",
|
| 320 |
+
"Holi splash event poster",
|
| 321 |
+
],
|
| 322 |
+
"festivals/navratri_garba": [
|
| 323 |
+
"Navratri celebration poster",
|
| 324 |
+
"Navratri garba night poster",
|
| 325 |
+
"dandiya event poster",
|
| 326 |
+
"Navratri festival poster design",
|
| 327 |
+
"garba night pass design",
|
| 328 |
+
"dandiya raas invitation",
|
| 329 |
+
"navratri dandiya night flyer",
|
| 330 |
+
"gujarati garba night poster",
|
| 331 |
+
"navratri utsav poster",
|
| 332 |
+
"dandiya night ticket design"
|
| 333 |
+
],
|
| 334 |
+
"festivals/durga_puja": [
|
| 335 |
+
"Durga puja poster design",
|
| 336 |
+
"Durga puja pandal poster",
|
| 337 |
+
"Durga puja celebration poster",
|
| 338 |
+
],
|
| 339 |
+
"festivals/ganesh_chaturthi": [
|
| 340 |
+
"Ganesh Chaturthi poster design",
|
| 341 |
+
"Ganpati festival poster",
|
| 342 |
+
"Ganesh utsav poster",
|
| 343 |
+
"eco friendly Ganpati poster",
|
| 344 |
+
],
|
| 345 |
+
"festivals/eid": [
|
| 346 |
+
"Eid celebration poster design",
|
| 347 |
+
"Eid mubarak event poster",
|
| 348 |
+
"Eid ul fitr poster",
|
| 349 |
+
"Ramadan event poster",
|
| 350 |
+
"iftar party poster",
|
| 351 |
+
],
|
| 352 |
+
"festivals/christmas": [
|
| 353 |
+
"Christmas party poster design",
|
| 354 |
+
"Christmas celebration event poster",
|
| 355 |
+
"Christmas carnival poster",
|
| 356 |
+
],
|
| 357 |
+
"festivals/new_year": [
|
| 358 |
+
"new year celebration poster",
|
| 359 |
+
"new year eve party poster",
|
| 360 |
+
"new year countdown poster",
|
| 361 |
+
],
|
| 362 |
+
"festivals/onam": [
|
| 363 |
+
"Onam festival poster design",
|
| 364 |
+
"Onam celebration poster",
|
| 365 |
+
"Kerala Onam poster",
|
| 366 |
+
],
|
| 367 |
+
"festivals/pongal_sankranti": [
|
| 368 |
+
"Pongal celebration poster",
|
| 369 |
+
"Makar Sankranti poster design",
|
| 370 |
+
"Lohri celebration poster",
|
| 371 |
+
"harvest festival poster India",
|
| 372 |
+
],
|
| 373 |
+
"festivals/independence_republic": [
|
| 374 |
+
"independence day poster India",
|
| 375 |
+
"republic day poster design",
|
| 376 |
+
"15 August celebration poster",
|
| 377 |
+
"26 January event poster",
|
| 378 |
+
"patriotic event poster India",
|
| 379 |
+
],
|
| 380 |
+
|
| 381 |
+
# ══════════════════════════════════════════════════════════════
|
| 382 |
+
# WORKSHOPS & ACADEMIC (parent: workshops/)
|
| 383 |
+
# ══════════════════════════════════════════════════════════════
|
| 384 |
+
"workshops/coding": [
|
| 385 |
+
"coding workshop poster",
|
| 386 |
+
"python workshop poster",
|
| 387 |
+
"programming workshop poster design",
|
| 388 |
+
"hackathon coding workshop poster",
|
| 389 |
+
"web dev bootcamp poster",
|
| 390 |
+
"learn to code event poster",
|
| 391 |
+
"java programming workshop poster",
|
| 392 |
+
"c++ workshop poster design",
|
| 393 |
+
"react js workshop poster",
|
| 394 |
+
"machine learning workshop poster design",
|
| 395 |
+
"app development workshop poster",
|
| 396 |
+
"coding bootcamp flyer design",
|
| 397 |
+
"programming contest poster",
|
| 398 |
+
"software engineering workshop poster",
|
| 399 |
+
"game development workshop poster",
|
| 400 |
+
"data structures workshop poster",
|
| 401 |
+
"coding marathon poster design",
|
| 402 |
+
"algorithm workshop poster",
|
| 403 |
+
"backend development workshop poster",
|
| 404 |
+
"frontend workshop poster design"
|
| 405 |
+
],
|
| 406 |
+
"workshops/design": [
|
| 407 |
+
"graphic design workshop poster",
|
| 408 |
+
"UI UX design workshop poster",
|
| 409 |
+
"video editing workshop poster",
|
| 410 |
+
"photography workshop poster",
|
| 411 |
+
"logo design workshop poster",
|
| 412 |
+
"poster design workshop flyer",
|
| 413 |
+
"typography workshop poster",
|
| 414 |
+
"adobe photoshop workshop poster",
|
| 415 |
+
"adobe illustrator workshop poster",
|
| 416 |
+
"digital art workshop poster",
|
| 417 |
+
"creative design workshop poster",
|
| 418 |
+
"branding workshop poster design",
|
| 419 |
+
"product design workshop poster",
|
| 420 |
+
"animation workshop poster design",
|
| 421 |
+
"3d design workshop poster",
|
| 422 |
+
"figma workshop poster",
|
| 423 |
+
"canva design workshop poster",
|
| 424 |
+
"sketching workshop poster design",
|
| 425 |
+
"motion graphics workshop poster",
|
| 426 |
+
"visual design workshop poster"
|
| 427 |
+
],
|
| 428 |
+
"workshops/business": [
|
| 429 |
+
"entrepreneurship seminar poster",
|
| 430 |
+
"startup workshop poster",
|
| 431 |
+
"business plan competition poster",
|
| 432 |
+
"marketing workshop poster",
|
| 433 |
+
"business strategy workshop flyer",
|
| 434 |
+
"startup weekend poster",
|
| 435 |
+
"business model canvas workshop",
|
| 436 |
+
"digital marketing seminar poster",
|
| 437 |
+
"finance workshop poster",
|
| 438 |
+
"MBA event poster design"
|
| 439 |
+
],
|
| 440 |
+
"workshops/soft_skills": [
|
| 441 |
+
"public speaking workshop poster",
|
| 442 |
+
"leadership workshop poster",
|
| 443 |
+
"communication skills seminar poster",
|
| 444 |
+
"resume building workshop poster",
|
| 445 |
+
],
|
| 446 |
+
"workshops/seminar": [
|
| 447 |
+
"seminar poster template professional",
|
| 448 |
+
"webinar event poster",
|
| 449 |
+
"guest lecture poster design",
|
| 450 |
+
"research paper workshop poster",
|
| 451 |
+
],
|
| 452 |
+
"workshops/conference": [
|
| 453 |
+
"academic conference poster",
|
| 454 |
+
"research symposium poster",
|
| 455 |
+
"TEDx event poster design",
|
| 456 |
+
"panel discussion poster",
|
| 457 |
+
"keynote speaker event poster",
|
| 458 |
+
],
|
| 459 |
+
"workshops/placement": [
|
| 460 |
+
"placement drive poster design",
|
| 461 |
+
"career fair poster",
|
| 462 |
+
"campus hiring poster design",
|
| 463 |
+
"internship drive poster",
|
| 464 |
+
"job recruitment poster",
|
| 465 |
+
],
|
| 466 |
+
|
| 467 |
+
# ══════════════════════════════════════════════════════════════
|
| 468 |
+
# SOCIAL & AWARENESS (parent: social/)
|
| 469 |
+
# ══════════════════════════════════════════════════════════════
|
| 470 |
+
"social/blood_donation": [
|
| 471 |
+
"blood donation camp poster",
|
| 472 |
+
"blood donation drive poster",
|
| 473 |
+
"donate blood save life poster",
|
| 474 |
+
],
|
| 475 |
+
"social/environment": [
|
| 476 |
+
"environment day poster design",
|
| 477 |
+
"tree planting event poster",
|
| 478 |
+
"cleanliness drive poster",
|
| 479 |
+
"earth day poster design",
|
| 480 |
+
],
|
| 481 |
+
"social/charity": [
|
| 482 |
+
"charity event poster design",
|
| 483 |
+
"fundraiser poster",
|
| 484 |
+
"NGO event poster",
|
| 485 |
+
"donation drive poster design",
|
| 486 |
+
],
|
| 487 |
+
"social/awareness": [
|
| 488 |
+
"health awareness camp poster",
|
| 489 |
+
"women empowerment event poster",
|
| 490 |
+
"mental health awareness poster",
|
| 491 |
+
"road safety awareness poster",
|
| 492 |
+
],
|
| 493 |
+
|
| 494 |
+
# ══════════════════════════════════════════════════════════════
|
| 495 |
+
# FOOD & ENTERTAINMENT (parent: entertainment/)
|
| 496 |
+
# ══════════════════════════════════════════════════════════════
|
| 497 |
+
"entertainment/food_fest": [
|
| 498 |
+
"food festival poster design",
|
| 499 |
+
"food carnival poster",
|
| 500 |
+
"street food event poster",
|
| 501 |
+
"bake sale poster design",
|
| 502 |
+
"cooking competition poster",
|
| 503 |
+
],
|
| 504 |
+
"entertainment/movie_night": [
|
| 505 |
+
"movie night event poster",
|
| 506 |
+
"film screening poster design",
|
| 507 |
+
"cinema night poster",
|
| 508 |
+
"short film festival poster",
|
| 509 |
+
],
|
| 510 |
+
"entertainment/gaming": [
|
| 511 |
+
"gaming night poster design",
|
| 512 |
+
"LAN party poster",
|
| 513 |
+
"board game event poster",
|
| 514 |
+
"game jam poster design",
|
| 515 |
+
],
|
| 516 |
+
|
| 517 |
+
# ══════════════════════════════════════════════════════════════
|
| 518 |
+
# DESIGN STYLES (parent: styles/)
|
| 519 |
+
# ══════════════════════════════════════════════════════════════
|
| 520 |
+
"styles/minimalist": [
|
| 521 |
+
"minimalist event poster design",
|
| 522 |
+
"clean modern poster layout",
|
| 523 |
+
"simple elegant poster design",
|
| 524 |
+
"white space poster design",
|
| 525 |
+
],
|
| 526 |
+
"styles/neon_glow": [
|
| 527 |
+
"neon glow party poster design",
|
| 528 |
+
"glowing neon event poster",
|
| 529 |
+
"cyberpunk poster design",
|
| 530 |
+
"neon lights party poster",
|
| 531 |
+
],
|
| 532 |
+
"styles/retro_vintage": [
|
| 533 |
+
"retro vintage poster design",
|
| 534 |
+
"80s style event poster",
|
| 535 |
+
"vintage college event poster",
|
| 536 |
+
"retro music poster design",
|
| 537 |
+
],
|
| 538 |
+
"styles/3d_futuristic": [
|
| 539 |
+
"3D event poster design",
|
| 540 |
+
"futuristic poster design",
|
| 541 |
+
"sci-fi event poster",
|
| 542 |
+
"holographic poster design",
|
| 543 |
+
],
|
| 544 |
+
"styles/watercolor": [
|
| 545 |
+
"watercolor event poster design",
|
| 546 |
+
"hand painted poster design",
|
| 547 |
+
"artistic poster illustration",
|
| 548 |
+
"brush stroke poster design",
|
| 549 |
+
],
|
| 550 |
+
"styles/gradient": [
|
| 551 |
+
"gradient poster design modern",
|
| 552 |
+
"colorful gradient event poster",
|
| 553 |
+
"vibrant gradient poster",
|
| 554 |
+
"modern abstract poster design",
|
| 555 |
+
],
|
| 556 |
+
"styles/dark_theme": [
|
| 557 |
+
"dark theme poster design",
|
| 558 |
+
"black background event poster",
|
| 559 |
+
"dark mode poster design",
|
| 560 |
+
"dark elegant poster",
|
| 561 |
+
],
|
| 562 |
+
"styles/typography": [
|
| 563 |
+
"typography poster design",
|
| 564 |
+
"bold text poster design",
|
| 565 |
+
"kinetic typography poster",
|
| 566 |
+
"lettering poster design",
|
| 567 |
+
],
|
| 568 |
+
"styles/illustration": [
|
| 569 |
+
"illustrated event poster",
|
| 570 |
+
"cartoon style poster design",
|
| 571 |
+
"hand drawn poster design",
|
| 572 |
+
"vector illustration poster",
|
| 573 |
+
],
|
| 574 |
+
|
| 575 |
+
# ══════════════════════════════════════════════════════════════
|
| 576 |
+
# GENERAL (catch-all)
|
| 577 |
+
# ══════════════════════════════════════════════════════════════
|
| 578 |
+
"general": [
|
| 579 |
+
"event poster design modern",
|
| 580 |
+
"professional poster layout",
|
| 581 |
+
"modern event flyer design",
|
| 582 |
+
"creative poster design 2024",
|
| 583 |
+
"minimalist event poster",
|
| 584 |
+
],
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 590 |
+
# Perceptual Hash Dedup (Moved to image_deduplicator.py)
|
| 591 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 595 |
+
# Pinterest Scraper
|
| 596 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 597 |
+
class PinterestScraper:
|
| 598 |
+
"""Scrape poster images from Pinterest using Selenium."""
|
| 599 |
+
|
| 600 |
+
PINTEREST_SEARCH_URL = "https://www.pinterest.com/search/pins/?q={query}"
|
| 601 |
+
TARGET_PER_THEME = 1900 # Download extra to ensure 1300+ survive quality filtering
|
| 602 |
+
|
| 603 |
+
def __init__(self, config: dict, output_dir: str):
|
| 604 |
+
self.config = config
|
| 605 |
+
self.output_dir = Path(output_dir)
|
| 606 |
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
| 607 |
+
|
| 608 |
+
scraping_cfg = config.get("scraping", {}).get("pinterest", {})
|
| 609 |
+
self.scroll_pause = scraping_cfg.get("scroll_pause_seconds", 2.0)
|
| 610 |
+
self.download_timeout = scraping_cfg.get("download_timeout", 15)
|
| 611 |
+
self.min_resolution = scraping_cfg.get("min_resolution", 512)
|
| 612 |
+
|
| 613 |
+
data_root = self.config.get("paths", {}).get("data", {}).get("root", "data")
|
| 614 |
+
self.dedup = GlobalImageDeduplicator(data_dir=data_root)
|
| 615 |
+
self.session = requests.Session()
|
| 616 |
+
self.session.headers.update({
|
| 617 |
+
"User-Agent": (
|
| 618 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 619 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 620 |
+
"Chrome/120.0.0.0 Safari/537.36"
|
| 621 |
+
)
|
| 622 |
+
})
|
| 623 |
+
|
| 624 |
+
def _create_driver(self) -> "webdriver.Chrome":
|
| 625 |
+
"""Create a headless Chrome driver."""
|
| 626 |
+
opts = Options()
|
| 627 |
+
opts.add_argument("--headless=new")
|
| 628 |
+
opts.add_argument("--no-sandbox")
|
| 629 |
+
opts.add_argument("--disable-dev-shm-usage")
|
| 630 |
+
opts.add_argument("--disable-gpu")
|
| 631 |
+
opts.add_argument("--window-size=1920,1080")
|
| 632 |
+
opts.add_argument(
|
| 633 |
+
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 634 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 635 |
+
"Chrome/120.0.0.0 Safari/537.36"
|
| 636 |
+
)
|
| 637 |
+
service = Service(ChromeDriverManager().install())
|
| 638 |
+
return webdriver.Chrome(service=service, options=opts)
|
| 639 |
+
|
| 640 |
+
def _scroll_and_collect_urls(self, driver, query: str, max_images: int) -> list[str]:
|
| 641 |
+
"""Scroll Pinterest search page and collect image URLs."""
|
| 642 |
+
url = self.PINTEREST_SEARCH_URL.format(query=query.replace(" ", "+"))
|
| 643 |
+
driver.get(url)
|
| 644 |
+
time.sleep(3)
|
| 645 |
+
|
| 646 |
+
image_urls: set[str] = set()
|
| 647 |
+
last_height = driver.execute_script("return document.body.scrollHeight")
|
| 648 |
+
stall_count = 0
|
| 649 |
+
|
| 650 |
+
pbar = tqdm(total=max_images, desc=f" Scrolling: {query[:40]}")
|
| 651 |
+
while len(image_urls) < max_images and stall_count < 8:
|
| 652 |
+
# Scroll down
|
| 653 |
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 654 |
+
time.sleep(self.scroll_pause)
|
| 655 |
+
|
| 656 |
+
# Parse page for image URLs
|
| 657 |
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
| 658 |
+
for img_tag in soup.find_all("img"):
|
| 659 |
+
src = img_tag.get("src", "")
|
| 660 |
+
# Pinterest uses /originals/ for full-res or /736x/ for medium
|
| 661 |
+
if "pinimg.com" in src:
|
| 662 |
+
# Try to get highest resolution
|
| 663 |
+
full_url = src.replace("/236x/", "/originals/").replace("/474x/", "/originals/").replace("/736x/", "/originals/")
|
| 664 |
+
image_urls.add(full_url)
|
| 665 |
+
|
| 666 |
+
pbar.update(len(image_urls) - pbar.n)
|
| 667 |
+
|
| 668 |
+
new_height = driver.execute_script("return document.body.scrollHeight")
|
| 669 |
+
if new_height == last_height:
|
| 670 |
+
stall_count += 1
|
| 671 |
+
else:
|
| 672 |
+
stall_count = 0
|
| 673 |
+
last_height = new_height
|
| 674 |
+
|
| 675 |
+
pbar.close()
|
| 676 |
+
return list(image_urls)[:max_images]
|
| 677 |
+
|
| 678 |
+
def _download_image(self, url: str, save_path: Path) -> bool:
|
| 679 |
+
"""Download a single image, validate, and dedup."""
|
| 680 |
+
try:
|
| 681 |
+
resp = self.session.get(url, timeout=self.download_timeout)
|
| 682 |
+
resp.raise_for_status()
|
| 683 |
+
|
| 684 |
+
img = Image.open(BytesIO(resp.content)).convert("RGB")
|
| 685 |
+
|
| 686 |
+
# Check minimum resolution
|
| 687 |
+
if min(img.size) < self.min_resolution:
|
| 688 |
+
return False
|
| 689 |
+
|
| 690 |
+
# Check duplicate against global corpus cache
|
| 691 |
+
if self.dedup.is_duplicate(img, save_path=str(save_path)):
|
| 692 |
+
return False
|
| 693 |
+
|
| 694 |
+
img.save(save_path, "JPEG", quality=95)
|
| 695 |
+
self.dedup.add_to_disk_cache(str(save_path), img)
|
| 696 |
+
return True
|
| 697 |
+
|
| 698 |
+
except Exception as e:
|
| 699 |
+
logger.debug(f"Failed to download {url}: {e}")
|
| 700 |
+
return False
|
| 701 |
+
|
| 702 |
+
def scrape_category(self, category: str, queries: list[str]) -> int:
|
| 703 |
+
"""
|
| 704 |
+
Scrape images for one category/theme.
|
| 705 |
+
Keeps going until TARGET_PER_THEME (1000) is reached.
|
| 706 |
+
Cycles through queries multiple rounds with increasing scroll depth.
|
| 707 |
+
Skips already-downloaded images.
|
| 708 |
+
"""
|
| 709 |
+
cat_dir = self.output_dir / category
|
| 710 |
+
cat_dir.mkdir(parents=True, exist_ok=True)
|
| 711 |
+
|
| 712 |
+
# Count existing images (skip already-downloaded)
|
| 713 |
+
existing_files = set(f.name for f in cat_dir.glob("*.jpg"))
|
| 714 |
+
existing_count = len(existing_files)
|
| 715 |
+
logger.info(f"Category '{category}': {existing_count} existing images")
|
| 716 |
+
|
| 717 |
+
if existing_count >= self.TARGET_PER_THEME:
|
| 718 |
+
logger.info(f" ✓ Already at target ({self.TARGET_PER_THEME}), skipping!")
|
| 719 |
+
return existing_count
|
| 720 |
+
|
| 721 |
+
remaining = self.TARGET_PER_THEME - existing_count
|
| 722 |
+
logger.info(f" Need {remaining} more images to reach {self.TARGET_PER_THEME}")
|
| 723 |
+
|
| 724 |
+
if not HAS_SELENIUM:
|
| 725 |
+
logger.error("Selenium not available — cannot scrape Pinterest.")
|
| 726 |
+
return 0
|
| 727 |
+
|
| 728 |
+
driver = self._create_driver()
|
| 729 |
+
total_downloaded = existing_count
|
| 730 |
+
all_seen_urls: set[str] = set() # Track all URLs across rounds
|
| 731 |
+
|
| 732 |
+
try:
|
| 733 |
+
round_num = 0
|
| 734 |
+
max_rounds = 5 # Try up to 5 rounds of cycling through queries
|
| 735 |
+
|
| 736 |
+
while total_downloaded < self.TARGET_PER_THEME and round_num < max_rounds:
|
| 737 |
+
round_num += 1
|
| 738 |
+
round_new = 0
|
| 739 |
+
# Increase scroll depth each round to find deeper content
|
| 740 |
+
scroll_target = 300 + (round_num * 200)
|
| 741 |
+
|
| 742 |
+
logger.info(f"\n ── Round {round_num}/{max_rounds} (scroll depth: {scroll_target}) ──")
|
| 743 |
+
|
| 744 |
+
for query_idx, query in enumerate(queries):
|
| 745 |
+
if total_downloaded >= self.TARGET_PER_THEME:
|
| 746 |
+
break
|
| 747 |
+
|
| 748 |
+
# Add variation to queries in later rounds
|
| 749 |
+
if round_num > 1:
|
| 750 |
+
variations = [
|
| 751 |
+
f"{query} HD",
|
| 752 |
+
f"{query} professional",
|
| 753 |
+
f"{query} creative",
|
| 754 |
+
f"{query} inspiration",
|
| 755 |
+
f"best {query}",
|
| 756 |
+
]
|
| 757 |
+
actual_query = variations[(round_num - 2) % len(variations)]
|
| 758 |
+
else:
|
| 759 |
+
actual_query = query
|
| 760 |
+
|
| 761 |
+
logger.info(f" Query [{query_idx+1}/{len(queries)}]: '{actual_query}'")
|
| 762 |
+
urls = self._scroll_and_collect_urls(driver, actual_query, scroll_target)
|
| 763 |
+
|
| 764 |
+
# Filter out already-seen URLs
|
| 765 |
+
new_urls = [u for u in urls if u not in all_seen_urls]
|
| 766 |
+
all_seen_urls.update(urls)
|
| 767 |
+
logger.info(f" Found {len(urls)} URLs ({len(new_urls)} new)")
|
| 768 |
+
|
| 769 |
+
for url in tqdm(new_urls, desc=f" Downloading", leave=False):
|
| 770 |
+
if total_downloaded >= self.TARGET_PER_THEME:
|
| 771 |
+
break
|
| 772 |
+
|
| 773 |
+
fname = hashlib.md5(url.encode()).hexdigest() + ".jpg"
|
| 774 |
+
save_path = cat_dir / fname
|
| 775 |
+
|
| 776 |
+
# Skip if already downloaded
|
| 777 |
+
if fname in existing_files or save_path.exists():
|
| 778 |
+
continue
|
| 779 |
+
|
| 780 |
+
if self._download_image(url, save_path):
|
| 781 |
+
total_downloaded += 1
|
| 782 |
+
round_new += 1
|
| 783 |
+
existing_files.add(fname)
|
| 784 |
+
|
| 785 |
+
# Rate-limit between queries
|
| 786 |
+
time.sleep(3)
|
| 787 |
+
|
| 788 |
+
logger.info(f" Round {round_num} complete: +{round_new} new images, {total_downloaded} total")
|
| 789 |
+
|
| 790 |
+
# If no new images found this round, stop early
|
| 791 |
+
if round_new == 0:
|
| 792 |
+
logger.info(f" No new images found in round {round_num}, moving on.")
|
| 793 |
+
break
|
| 794 |
+
|
| 795 |
+
finally:
|
| 796 |
+
driver.quit()
|
| 797 |
+
|
| 798 |
+
new_count = len(list(cat_dir.glob("*.jpg")))
|
| 799 |
+
logger.info(
|
| 800 |
+
f"\nCategory '{category}': {new_count}/{self.TARGET_PER_THEME} images "
|
| 801 |
+
f"({new_count - existing_count} new this session)"
|
| 802 |
+
)
|
| 803 |
+
return new_count
|
| 804 |
+
|
| 805 |
+
def scrape_all(self, queries_map: dict[str, list[str]] | None = None) -> dict[str, int]:
|
| 806 |
+
"""Scrape all categories."""
|
| 807 |
+
if queries_map is None:
|
| 808 |
+
queries_map = DEFAULT_QUERIES
|
| 809 |
+
|
| 810 |
+
results = {}
|
| 811 |
+
for category, queries in queries_map.items():
|
| 812 |
+
logger.info(f"\n{'='*60}")
|
| 813 |
+
logger.info(f"Scraping category: {category}")
|
| 814 |
+
logger.info(f"{'='*60}")
|
| 815 |
+
count = self.scrape_category(category, queries)
|
| 816 |
+
results[category] = count
|
| 817 |
+
|
| 818 |
+
return results
|
| 819 |
+
|
| 820 |
+
|
| 821 |
+
|
| 822 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 823 |
+
# Main
|
| 824 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 825 |
+
def main():
|
| 826 |
+
parser = argparse.ArgumentParser(description="Pinterest Poster Image Scraper")
|
| 827 |
+
parser.add_argument("--config", default="configs/config.yaml", help="Path to config.yaml")
|
| 828 |
+
parser.add_argument("--category", default=None, help="Scrape a single category only")
|
| 829 |
+
parser.add_argument("--target", type=int, default=None, help="Override target image count (default: 1900)")
|
| 830 |
+
args = parser.parse_args()
|
| 831 |
+
|
| 832 |
+
config = load_config(args.config)
|
| 833 |
+
|
| 834 |
+
# Override target if specified
|
| 835 |
+
if args.target:
|
| 836 |
+
PinterestScraper.TARGET_PER_THEME = args.target
|
| 837 |
+
logger.info(f"🎯 Target count overridden to {args.target} images per category")
|
| 838 |
+
|
| 839 |
+
raw_dir = config["paths"]["data"]["raw"]
|
| 840 |
+
|
| 841 |
+
scraper = PinterestScraper(config, raw_dir)
|
| 842 |
+
|
| 843 |
+
if args.category:
|
| 844 |
+
queries = DEFAULT_QUERIES.get(args.category, [f"{args.category} poster design"])
|
| 845 |
+
results = {args.category: scraper.scrape_category(args.category, queries)}
|
| 846 |
+
else:
|
| 847 |
+
results = scraper.scrape_all()
|
| 848 |
+
|
| 849 |
+
# Summary
|
| 850 |
+
logger.info("\n" + "=" * 60)
|
| 851 |
+
logger.info("SCRAPING SUMMARY")
|
| 852 |
+
logger.info("=" * 60)
|
| 853 |
+
total = 0
|
| 854 |
+
for cat, count in results.items():
|
| 855 |
+
logger.info(f" {cat:20s}: {count:5d} images")
|
| 856 |
+
total += count
|
| 857 |
+
logger.info(f" {'TOTAL':20s}: {total:5d} images")
|
| 858 |
+
logger.info("=" * 60)
|
| 859 |
+
|
| 860 |
+
|
| 861 |
+
if __name__ == "__main__":
|
| 862 |
+
main()
|
scripts/pinterest_tuning_scraper.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import re
|
| 4 |
+
import time
|
| 5 |
+
import random
|
| 6 |
+
import hashlib
|
| 7 |
+
import logging
|
| 8 |
+
import argparse
|
| 9 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from io import BytesIO
|
| 12 |
+
|
| 13 |
+
import yaml
|
| 14 |
+
import requests
|
| 15 |
+
from requests.adapters import HTTPAdapter
|
| 16 |
+
from PIL import Image
|
| 17 |
+
from tqdm import tqdm
|
| 18 |
+
from image_deduplicator import GlobalImageDeduplicator
|
| 19 |
+
from tuning_dataset import CATEGORIES
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from selenium import webdriver
|
| 23 |
+
from selenium.webdriver.chrome.service import Service
|
| 24 |
+
from selenium.webdriver.chrome.options import Options
|
| 25 |
+
from selenium.webdriver.common.by import By
|
| 26 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 27 |
+
HAS_SELENIUM = True
|
| 28 |
+
except ImportError:
|
| 29 |
+
HAS_SELENIUM = False
|
| 30 |
+
print("WARNING: selenium/webdriver_manager not installed.")
|
| 31 |
+
|
| 32 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 33 |
+
logger = logging.getLogger(__name__)
|
| 34 |
+
|
| 35 |
+
PINIMG_RE = re.compile(r'https://[a-z0-9]+\.pinimg\.com/[^\s"\'<>]+\.jpg')
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class PinterestTuningScraper:
|
| 39 |
+
"""Scrape specific tuning poster images from Pinterest using Selenium."""
|
| 40 |
+
|
| 41 |
+
PINTEREST_SEARCH_URL = "https://www.pinterest.com/search/pins/?q={query}"
|
| 42 |
+
|
| 43 |
+
def __init__(self, config: dict, output_dir: str, target_per_theme: int = 20):
|
| 44 |
+
self.config = config
|
| 45 |
+
self.output_dir = Path(output_dir)
|
| 46 |
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
| 47 |
+
self.target_per_theme = target_per_theme
|
| 48 |
+
|
| 49 |
+
scraping_cfg = config.get("scraping", {}).get("pinterest", {})
|
| 50 |
+
self.scroll_pause = scraping_cfg.get("scroll_pause_seconds", 2.0)
|
| 51 |
+
self.download_timeout = scraping_cfg.get("download_timeout", 15)
|
| 52 |
+
self.min_resolution = scraping_cfg.get("min_resolution", 512)
|
| 53 |
+
|
| 54 |
+
data_root = self.config.get("paths", {}).get("data", {}).get("root", "data")
|
| 55 |
+
self.dedup = GlobalImageDeduplicator(data_dir=data_root)
|
| 56 |
+
|
| 57 |
+
self.session = requests.Session()
|
| 58 |
+
self.session.headers.update({
|
| 59 |
+
"User-Agent": (
|
| 60 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 61 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 62 |
+
"Chrome/120.0.0.0 Safari/537.36"
|
| 63 |
+
)
|
| 64 |
+
})
|
| 65 |
+
adapter = HTTPAdapter(
|
| 66 |
+
pool_connections=16,
|
| 67 |
+
pool_maxsize=16,
|
| 68 |
+
max_retries=1
|
| 69 |
+
)
|
| 70 |
+
self.session.mount("https://i.pinimg.com", adapter)
|
| 71 |
+
self.session.mount("https://v1.pinimg.com", adapter)
|
| 72 |
+
|
| 73 |
+
def _create_driver(self):
|
| 74 |
+
import undetected_chromedriver as uc
|
| 75 |
+
import random
|
| 76 |
+
|
| 77 |
+
opts = uc.ChromeOptions()
|
| 78 |
+
opts.add_argument("--no-sandbox")
|
| 79 |
+
opts.add_argument("--disable-dev-shm-usage")
|
| 80 |
+
opts.add_argument("--disable-gpu")
|
| 81 |
+
opts.add_argument("--window-size=1920,1080")
|
| 82 |
+
opts.add_argument("--mute-audio")
|
| 83 |
+
opts.add_argument("--no-first-run")
|
| 84 |
+
opts.add_argument("--disable-background-networking")
|
| 85 |
+
|
| 86 |
+
driver = uc.Chrome(options=opts, version_main=145, headless=True)
|
| 87 |
+
driver.set_page_load_timeout(30)
|
| 88 |
+
driver.set_script_timeout(10)
|
| 89 |
+
return driver
|
| 90 |
+
|
| 91 |
+
def _nuke_modals(self, driver):
|
| 92 |
+
try:
|
| 93 |
+
driver.execute_script('''
|
| 94 |
+
document.querySelectorAll(
|
| 95 |
+
'[data-test-id="giftWrap"],[data-test-id="signup"],'
|
| 96 |
+
'[data-test-id="unauthModal"],.Modal__overlay'
|
| 97 |
+
).forEach(e => e.remove());
|
| 98 |
+
document.body.style.overflow = "auto";
|
| 99 |
+
document.documentElement.style.overflow = "auto";
|
| 100 |
+
''')
|
| 101 |
+
except Exception:
|
| 102 |
+
pass
|
| 103 |
+
|
| 104 |
+
def _scroll_and_collect_urls(self, driver, query: str, max_images: int) -> list[str]:
|
| 105 |
+
url = self.PINTEREST_SEARCH_URL.format(query=query.replace(" ", "+"))
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
driver.get(url)
|
| 109 |
+
except Exception:
|
| 110 |
+
pass
|
| 111 |
+
|
| 112 |
+
# Wait up to 15s for React to hydrate
|
| 113 |
+
for _ in range(15):
|
| 114 |
+
if "pinimg.com" in driver.page_source:
|
| 115 |
+
break
|
| 116 |
+
time.sleep(1)
|
| 117 |
+
|
| 118 |
+
self._nuke_modals(driver)
|
| 119 |
+
|
| 120 |
+
image_urls = set()
|
| 121 |
+
last_height = 0
|
| 122 |
+
scroll_step = 400 # smaller steps — triggers lazy loader reliably
|
| 123 |
+
current_pos = 0
|
| 124 |
+
max_scroll_pos = 80000 # ~80 screens worth, Pinterest never goes deeper
|
| 125 |
+
no_new_count = 0 # stall on CONTENT not page height
|
| 126 |
+
height_stall_count = 0
|
| 127 |
+
|
| 128 |
+
pbar = tqdm(total=max_images, desc=f" Scrolling: {query[:40]}")
|
| 129 |
+
|
| 130 |
+
while len(image_urls) < max_images and no_new_count < 8 and current_pos < max_scroll_pos:
|
| 131 |
+
try:
|
| 132 |
+
current_pos += scroll_step
|
| 133 |
+
driver.execute_script(f"window.scrollTo(0, {current_pos});")
|
| 134 |
+
time.sleep(self.scroll_pause + random.uniform(0.3, 1.2))
|
| 135 |
+
except Exception:
|
| 136 |
+
no_new_count += 1
|
| 137 |
+
continue
|
| 138 |
+
|
| 139 |
+
prev_count = len(image_urls)
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
page_source = driver.page_source
|
| 143 |
+
found = PINIMG_RE.findall(page_source)
|
| 144 |
+
for src in found:
|
| 145 |
+
if "profile_images" in src or "75x75_RS" in src:
|
| 146 |
+
continue
|
| 147 |
+
# 736x resolution keeps download fast but high-quality enough
|
| 148 |
+
src = (src.replace("/236x/", "/736x/")
|
| 149 |
+
.replace("/474x/", "/736x/")
|
| 150 |
+
.replace("/originals/", "/736x/"))
|
| 151 |
+
image_urls.add(src)
|
| 152 |
+
except Exception:
|
| 153 |
+
pass
|
| 154 |
+
|
| 155 |
+
new_found = len(image_urls) - prev_count
|
| 156 |
+
if new_found == 0:
|
| 157 |
+
no_new_count += 1 # count scrolls with ZERO new images
|
| 158 |
+
else:
|
| 159 |
+
no_new_count = 0 # reset whenever new images found
|
| 160 |
+
|
| 161 |
+
pbar.update(max(0, len(image_urls) - pbar.n))
|
| 162 |
+
|
| 163 |
+
try:
|
| 164 |
+
new_height = driver.execute_script("return document.body.scrollHeight")
|
| 165 |
+
if current_pos >= new_height:
|
| 166 |
+
self._nuke_modals(driver)
|
| 167 |
+
if new_height == last_height:
|
| 168 |
+
height_stall_count += 1
|
| 169 |
+
if height_stall_count >= 3:
|
| 170 |
+
# Truly at bottom of page, nothing more to load
|
| 171 |
+
break
|
| 172 |
+
else:
|
| 173 |
+
height_stall_count = 0
|
| 174 |
+
last_height = new_height
|
| 175 |
+
current_pos = new_height
|
| 176 |
+
except Exception:
|
| 177 |
+
no_new_count += 1
|
| 178 |
+
|
| 179 |
+
pbar.close()
|
| 180 |
+
return list(image_urls)[:max_images]
|
| 181 |
+
|
| 182 |
+
def _is_valid_url(self, url: str) -> bool:
|
| 183 |
+
skip = ["profile_images", "75x75", "30x30", "user_images", "avatars"]
|
| 184 |
+
return not any(s in url for s in skip)
|
| 185 |
+
|
| 186 |
+
def _download_image(self, url: str, save_path: Path) -> bool:
|
| 187 |
+
try:
|
| 188 |
+
resp = self.session.get(url, timeout=(2, 4))
|
| 189 |
+
resp.raise_for_status()
|
| 190 |
+
img = Image.open(BytesIO(resp.content)).convert("RGB")
|
| 191 |
+
if min(img.size) < self.min_resolution:
|
| 192 |
+
return False
|
| 193 |
+
if self.dedup.is_duplicate(img, save_path=str(save_path)):
|
| 194 |
+
return False
|
| 195 |
+
img.save(save_path, "JPEG", quality=95)
|
| 196 |
+
self.dedup.add_to_disk_cache(str(save_path), img)
|
| 197 |
+
return True
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.debug(f"Failed to download {url}: {e}")
|
| 200 |
+
return False
|
| 201 |
+
|
| 202 |
+
def scrape_category(self, subcategory_path: str, queries: list[str]) -> int:
|
| 203 |
+
cat_dir = self.output_dir / subcategory_path
|
| 204 |
+
cat_dir.mkdir(parents=True, exist_ok=True)
|
| 205 |
+
|
| 206 |
+
existing_files = set(f.name for f in cat_dir.glob("*.jpg"))
|
| 207 |
+
existing_count = len(existing_files)
|
| 208 |
+
logger.info(f"Subcategory '{subcategory_path}': {existing_count} existing images")
|
| 209 |
+
|
| 210 |
+
if existing_count >= self.target_per_theme:
|
| 211 |
+
logger.info(f" ✓ Already at target ({self.target_per_theme}), skipping!")
|
| 212 |
+
return existing_count
|
| 213 |
+
|
| 214 |
+
if not HAS_SELENIUM:
|
| 215 |
+
logger.error("Selenium not available.")
|
| 216 |
+
return 0
|
| 217 |
+
|
| 218 |
+
driver = self._create_driver()
|
| 219 |
+
total_downloaded = existing_count
|
| 220 |
+
all_seen_urls = set()
|
| 221 |
+
queries = list(queries)
|
| 222 |
+
|
| 223 |
+
try:
|
| 224 |
+
query_cycle = 0
|
| 225 |
+
query_fail_counts = {}
|
| 226 |
+
|
| 227 |
+
while total_downloaded < self.target_per_theme:
|
| 228 |
+
for query in list(queries):
|
| 229 |
+
if total_downloaded >= self.target_per_theme:
|
| 230 |
+
break
|
| 231 |
+
# Mutate query to break pagination bounds and prioritize design aesthetics
|
| 232 |
+
active_query = query
|
| 233 |
+
if query_cycle > 0:
|
| 234 |
+
modifiers = [" poster layout", " graphic design", " aesthetic", " template", " typography"]
|
| 235 |
+
active_query = f"{query}{modifiers[query_cycle % len(modifiers)]}"
|
| 236 |
+
|
| 237 |
+
logger.info(f" Query: '{active_query}' (Cycle {query_cycle + 1})")
|
| 238 |
+
target_to_fetch = self.target_per_theme * (query_cycle + 2)
|
| 239 |
+
|
| 240 |
+
try:
|
| 241 |
+
urls = self._scroll_and_collect_urls(driver, active_query, target_to_fetch)
|
| 242 |
+
query_fail_counts[query] = 0
|
| 243 |
+
except Exception as scroll_err:
|
| 244 |
+
logger.warning(f" WebDriver failed/timed out on '{query}': {scroll_err}")
|
| 245 |
+
query_fail_counts[query] = query_fail_counts.get(query, 0) + 1
|
| 246 |
+
|
| 247 |
+
if query_fail_counts[query] >= 1:
|
| 248 |
+
logger.error(f" Skipping query '{query}' permanently.")
|
| 249 |
+
queries = [q for q in queries if q != query]
|
| 250 |
+
if not queries:
|
| 251 |
+
logger.error(" All queries failed. Breaking out of category.")
|
| 252 |
+
break
|
| 253 |
+
|
| 254 |
+
logger.warning(" Rebooting Chrome driver and retrying...")
|
| 255 |
+
time.sleep(random.uniform(3, 6))
|
| 256 |
+
try:
|
| 257 |
+
driver.quit()
|
| 258 |
+
except Exception:
|
| 259 |
+
pass
|
| 260 |
+
driver = self._create_driver()
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
+
# Reboot driver if session returned near-zero results (blacklisted)
|
| 264 |
+
if len(urls) < 10 and total_downloaded < self.target_per_theme:
|
| 265 |
+
logger.warning(" Session returned <10 URLs — rebooting driver.")
|
| 266 |
+
try:
|
| 267 |
+
driver.quit()
|
| 268 |
+
except Exception:
|
| 269 |
+
pass
|
| 270 |
+
time.sleep(random.uniform(3, 6))
|
| 271 |
+
driver = self._create_driver()
|
| 272 |
+
new_urls = [u for u in urls if u not in all_seen_urls]
|
| 273 |
+
all_seen_urls.update(urls)
|
| 274 |
+
|
| 275 |
+
# FIX 7: parallel downloads — 16 workers instead of sequential
|
| 276 |
+
needed = self.target_per_theme - total_downloaded
|
| 277 |
+
candidates = [
|
| 278 |
+
u for u in new_urls
|
| 279 |
+
if self._is_valid_url(u)
|
| 280 |
+
and f"tuning_{hashlib.md5(u.encode()).hexdigest()[:12]}.jpg"
|
| 281 |
+
not in existing_files
|
| 282 |
+
][:needed * 4]
|
| 283 |
+
|
| 284 |
+
def _dl(u, _cat_dir=cat_dir):
|
| 285 |
+
fname = f"tuning_{hashlib.md5(u.encode()).hexdigest()[:12]}.jpg"
|
| 286 |
+
sp = _cat_dir / fname
|
| 287 |
+
if sp.exists():
|
| 288 |
+
return None
|
| 289 |
+
return (fname, self._download_image(u, sp))
|
| 290 |
+
|
| 291 |
+
with ThreadPoolExecutor(max_workers=16) as pool:
|
| 292 |
+
futures = {pool.submit(_dl, u): u for u in candidates}
|
| 293 |
+
pbar_dl = tqdm(total=min(needed, len(candidates)),
|
| 294 |
+
desc=" Downloading", leave=False)
|
| 295 |
+
for fut in as_completed(futures):
|
| 296 |
+
if total_downloaded >= self.target_per_theme:
|
| 297 |
+
pool.shutdown(wait=True, cancel_futures=True)
|
| 298 |
+
break
|
| 299 |
+
result = fut.result()
|
| 300 |
+
if result:
|
| 301 |
+
fname, ok = result
|
| 302 |
+
if ok:
|
| 303 |
+
total_downloaded += 1
|
| 304 |
+
existing_files.add(fname)
|
| 305 |
+
pbar_dl.update(1)
|
| 306 |
+
pbar_dl.close()
|
| 307 |
+
|
| 308 |
+
if total_downloaded < self.target_per_theme:
|
| 309 |
+
if not queries:
|
| 310 |
+
break
|
| 311 |
+
logger.warning(
|
| 312 |
+
f" Only at {total_downloaded}/{self.target_per_theme}. "
|
| 313 |
+
f"Cycling queries again and scrolling deeper."
|
| 314 |
+
)
|
| 315 |
+
query_cycle += 1
|
| 316 |
+
max_cycles = max(5, len(queries)) # exhaust full query pool
|
| 317 |
+
if query_cycle >= max_cycles:
|
| 318 |
+
logger.error(
|
| 319 |
+
f" Exhausted all {max_cycles} query cycles. "
|
| 320 |
+
f"Stuck at {total_downloaded}/{self.target_per_theme}. Breaking."
|
| 321 |
+
)
|
| 322 |
+
break
|
| 323 |
+
|
| 324 |
+
finally:
|
| 325 |
+
try:
|
| 326 |
+
driver.quit()
|
| 327 |
+
except Exception:
|
| 328 |
+
pass
|
| 329 |
+
|
| 330 |
+
logger.info(f" ✓ Downloaded {total_downloaded} images for {subcategory_path}.")
|
| 331 |
+
return total_downloaded
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def load_config(config_path: str = "configs/config.yaml") -> dict:
|
| 335 |
+
with open(config_path, "r", encoding="utf-8") as f:
|
| 336 |
+
return yaml.safe_load(f)
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def main():
|
| 340 |
+
parser = argparse.ArgumentParser(description="Tuning Dataset Pinterest Scraper")
|
| 341 |
+
parser.add_argument("--target", default="data/tuning", help="Root directory for tuning data")
|
| 342 |
+
parser.add_argument("--per-category", type=int, default=100, help="Images per subcategory")
|
| 343 |
+
args = parser.parse_args()
|
| 344 |
+
|
| 345 |
+
config = load_config()
|
| 346 |
+
target_dir = Path(args.target)
|
| 347 |
+
|
| 348 |
+
logger.info("🚀 Starting Pinterest Tuning Scraper")
|
| 349 |
+
logger.info(f"🎯 Target Count: {args.per_category} images per subcategory")
|
| 350 |
+
|
| 351 |
+
scraper = PinterestTuningScraper(config, output_dir=str(target_dir), target_per_theme=args.per_category)
|
| 352 |
+
|
| 353 |
+
for subcat, queries in CATEGORIES.items():
|
| 354 |
+
logger.info(f"\n{'='*60}")
|
| 355 |
+
logger.info(f"Processing: {subcat}")
|
| 356 |
+
logger.info(f"{'='*60}")
|
| 357 |
+
try:
|
| 358 |
+
count = scraper.scrape_category(subcat, queries)
|
| 359 |
+
logger.info(f"✅ Finished {subcat}: {count} total images")
|
| 360 |
+
except Exception as e:
|
| 361 |
+
logger.error(f"❌ Failed processing {subcat}: {e}")
|
| 362 |
+
time.sleep(2)
|
| 363 |
+
|
| 364 |
+
logger.info("\n🎉 All tuning categories processed safely without duplicates!")
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
if __name__ == "__main__":
|
| 368 |
+
main()
|
scripts/poster_compositor.py
ADDED
|
@@ -0,0 +1,507 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
poster_compositor.py
|
| 4 |
+
====================
|
| 5 |
+
Smart Poster Compositor — Text Placement Engine
|
| 6 |
+
|
| 7 |
+
Renders PIL typography on SDXL-generated artwork with three placement modes:
|
| 8 |
+
|
| 9 |
+
"auto" Scans the image for the quietest region (fewest edges) and
|
| 10 |
+
places text there automatically.
|
| 11 |
+
manual Pass text_position="top" | "center" | "bottom" to pin the text
|
| 12 |
+
block to a fixed zone — useful when you have already reviewed
|
| 13 |
+
the artwork and know where the clean space is.
|
| 14 |
+
"none" Returns the artwork untouched (useful for debugging raw art).
|
| 15 |
+
|
| 16 |
+
A feathered dark scrim is applied only under the text block when scrim=True.
|
| 17 |
+
Set scrim=False for bright or vivid artworks where a dark overlay would ruin
|
| 18 |
+
the visual — text rendering already includes drop shadows and strokes for
|
| 19 |
+
standalone legibility.
|
| 20 |
+
|
| 21 |
+
Styles:
|
| 22 |
+
modern Centered Montserrat, accent rules, info pill.
|
| 23 |
+
bold Left-aligned heavy display, side accent bars, right-aligned organiser.
|
| 24 |
+
elegant Centered Playfair Display, fine horizontal rules.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
from __future__ import annotations
|
| 28 |
+
|
| 29 |
+
import os
|
| 30 |
+
import textwrap
|
| 31 |
+
from typing import Literal
|
| 32 |
+
|
| 33 |
+
import numpy as np
|
| 34 |
+
import requests
|
| 35 |
+
from PIL import Image, ImageDraw, ImageFilter, ImageFont
|
| 36 |
+
|
| 37 |
+
# ---------------------------------------------------------------------------
|
| 38 |
+
# Configuration
|
| 39 |
+
# ---------------------------------------------------------------------------
|
| 40 |
+
|
| 41 |
+
_FONTS_DIR = os.path.normpath(
|
| 42 |
+
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "assets", "fonts")
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
_FONT_URLS: dict[str, str] = {
|
| 46 |
+
"Montserrat-Regular":
|
| 47 |
+
"https://github.com/JulietaUla/Montserrat/raw/master/fonts/ttf/Montserrat-Regular.ttf",
|
| 48 |
+
"Montserrat-Medium":
|
| 49 |
+
"https://github.com/JulietaUla/Montserrat/raw/master/fonts/ttf/Montserrat-Medium.ttf",
|
| 50 |
+
"Montserrat-Bold":
|
| 51 |
+
"https://github.com/JulietaUla/Montserrat/raw/master/fonts/ttf/Montserrat-Bold.ttf",
|
| 52 |
+
"Montserrat-ExtraBold":
|
| 53 |
+
"https://github.com/JulietaUla/Montserrat/raw/master/fonts/ttf/Montserrat-ExtraBold.ttf",
|
| 54 |
+
"PlayfairDisplay-Bold":
|
| 55 |
+
"https://github.com/google/fonts/raw/main/ofl/playfairdisplay/PlayfairDisplay%5Bwght%5D.ttf",
|
| 56 |
+
"PlayfairDisplay-Regular":
|
| 57 |
+
"https://github.com/google/fonts/raw/main/ofl/playfairdisplay/PlayfairDisplay-Italic%5Bwght%5D.ttf",
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
Style = Literal["modern", "bold", "elegant"]
|
| 61 |
+
Position = Literal["auto", "top", "center", "bottom", "none"]
|
| 62 |
+
|
| 63 |
+
# Vertical centre of the text block as a fraction of image height
|
| 64 |
+
_POSITION_RATIOS: dict[str, float] = {
|
| 65 |
+
"top": 0.14, # tight to the very top — above most subjects
|
| 66 |
+
"center": 0.50,
|
| 67 |
+
"bottom": 0.80,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# Scrim intensity per style — bold needs more coverage to hide busy artwork
|
| 71 |
+
_SCRIM_INTENSITY: dict[str, float] = {
|
| 72 |
+
"bold": 0.90,
|
| 73 |
+
"modern": 0.78,
|
| 74 |
+
"elegant": 0.75,
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
# Font helpers
|
| 80 |
+
# ---------------------------------------------------------------------------
|
| 81 |
+
|
| 82 |
+
def ensure_fonts() -> str:
|
| 83 |
+
"""Download fonts to the assets directory if they are not already cached."""
|
| 84 |
+
os.makedirs(_FONTS_DIR, exist_ok=True)
|
| 85 |
+
for name, url in _FONT_URLS.items():
|
| 86 |
+
dest = os.path.join(_FONTS_DIR, f"{name}.ttf")
|
| 87 |
+
if os.path.exists(dest):
|
| 88 |
+
continue
|
| 89 |
+
print(f" Downloading font: {name} ...")
|
| 90 |
+
try:
|
| 91 |
+
r = requests.get(url, timeout=30)
|
| 92 |
+
r.raise_for_status()
|
| 93 |
+
with open(dest, "wb") as fh:
|
| 94 |
+
fh.write(r.content)
|
| 95 |
+
except Exception as exc:
|
| 96 |
+
print(f" Warning — could not download {name}: {exc}")
|
| 97 |
+
return _FONTS_DIR
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def load_font(name: str, size: int) -> ImageFont.FreeTypeFont:
|
| 101 |
+
"""Return a font by logical name and point size, with graceful fallback."""
|
| 102 |
+
path = os.path.join(_FONTS_DIR, f"{name}.ttf")
|
| 103 |
+
if os.path.exists(path):
|
| 104 |
+
return ImageFont.truetype(path, size)
|
| 105 |
+
for fallback in ("DejaVuSans.ttf", "arial.ttf"):
|
| 106 |
+
try:
|
| 107 |
+
return ImageFont.truetype(fallback, size)
|
| 108 |
+
except OSError:
|
| 109 |
+
continue
|
| 110 |
+
return ImageFont.load_default()
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ---------------------------------------------------------------------------
|
| 114 |
+
# Text measurement
|
| 115 |
+
# ---------------------------------------------------------------------------
|
| 116 |
+
|
| 117 |
+
def _text_size(text: str, font: ImageFont.FreeTypeFont) -> tuple[int, int]:
|
| 118 |
+
bb = font.getbbox(text)
|
| 119 |
+
return bb[2] - bb[0], bb[3] - bb[1]
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _wrap_title(title: str, style: Style) -> tuple[list[str], int]:
|
| 123 |
+
"""Return (wrapped lines, font size) for the title based on length and style."""
|
| 124 |
+
length = len(title)
|
| 125 |
+
if style == "bold":
|
| 126 |
+
size = 70 if length < 15 else 56 if length < 25 else 44
|
| 127 |
+
width = 14 if size > 56 else 18
|
| 128 |
+
elif style == "elegant":
|
| 129 |
+
size = 56 if length < 20 else 44 if length < 30 else 36
|
| 130 |
+
width = 18 if size > 44 else 22
|
| 131 |
+
else: # modern
|
| 132 |
+
size = 64 if length < 20 else 50 if length < 30 else 40
|
| 133 |
+
width = 20 if size > 50 else 24
|
| 134 |
+
|
| 135 |
+
display = title if style == "elegant" else title.upper()
|
| 136 |
+
return textwrap.wrap(display, width=width), size
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ---------------------------------------------------------------------------
|
| 140 |
+
# Quiet-zone detection (used only when text_position="auto")
|
| 141 |
+
# ---------------------------------------------------------------------------
|
| 142 |
+
|
| 143 |
+
def _score_bands(image: Image.Image, n: int = 5) -> list[tuple[int, int, int, float]]:
|
| 144 |
+
"""Score horizontal bands by edge density. Returns list sorted quietest-first.
|
| 145 |
+
|
| 146 |
+
The top 15 % of the image is always excluded — that space is reserved
|
| 147 |
+
for organiser branding and top chrome elements.
|
| 148 |
+
"""
|
| 149 |
+
w, h = image.size
|
| 150 |
+
edges = np.array(image.convert("L").filter(ImageFilter.FIND_EDGES), dtype=np.float32)
|
| 151 |
+
band_h = h // n
|
| 152 |
+
top_margin = int(h * 0.15)
|
| 153 |
+
bands: list[tuple[int, int, int, float]] = []
|
| 154 |
+
|
| 155 |
+
for i in range(n):
|
| 156 |
+
y0 = i * band_h
|
| 157 |
+
y1 = min((i + 1) * band_h, h)
|
| 158 |
+
if y1 <= top_margin:
|
| 159 |
+
score = 9999.0
|
| 160 |
+
elif y0 < top_margin:
|
| 161 |
+
score = float(np.mean(edges[top_margin:y1, :]))
|
| 162 |
+
else:
|
| 163 |
+
score = float(np.mean(edges[y0:y1, :]))
|
| 164 |
+
bands.append((i, y0, y1, score))
|
| 165 |
+
|
| 166 |
+
bands.sort(key=lambda b: b[3])
|
| 167 |
+
return bands
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _find_text_region(
|
| 171 |
+
image: Image.Image,
|
| 172 |
+
block_height: int,
|
| 173 |
+
n_bands: int = 5,
|
| 174 |
+
) -> tuple[int, int, int, str]:
|
| 175 |
+
"""Return (y_center, y_top, y_bottom, hint) for the quietest usable region."""
|
| 176 |
+
w, h = image.size
|
| 177 |
+
bands = _score_bands(image, n_bands)
|
| 178 |
+
_, y0, y1, _ = bands[0]
|
| 179 |
+
|
| 180 |
+
if (y1 - y0) < block_height:
|
| 181 |
+
expand = (block_height - (y1 - y0)) // 2
|
| 182 |
+
y0 = max(0, y0 - expand)
|
| 183 |
+
y1 = min(h, y1 + expand)
|
| 184 |
+
|
| 185 |
+
y_center = (y0 + y1) // 2
|
| 186 |
+
rel = y_center / h
|
| 187 |
+
hint = "top" if rel < 0.33 else "bottom" if rel > 0.66 else "center"
|
| 188 |
+
return y_center, y0, y1, hint
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
# ---------------------------------------------------------------------------
|
| 192 |
+
# Localized dark scrim (feathered, only under the text block)
|
| 193 |
+
# ---------------------------------------------------------------------------
|
| 194 |
+
|
| 195 |
+
def _apply_scrim(
|
| 196 |
+
image: Image.Image,
|
| 197 |
+
y_top: int,
|
| 198 |
+
y_bottom: int,
|
| 199 |
+
intensity: float = 0.78,
|
| 200 |
+
) -> Image.Image:
|
| 201 |
+
"""Burn a soft dark gradient over *image* between y_top and y_bottom ONLY.
|
| 202 |
+
|
| 203 |
+
60-pixel feathered edges ensure the scrim blends invisibly into the
|
| 204 |
+
surrounding artwork. Nothing outside the text region is darkened.
|
| 205 |
+
"""
|
| 206 |
+
w, h = image.size
|
| 207 |
+
scrim = Image.new("RGBA", (w, h), (0, 0, 0, 0))
|
| 208 |
+
draw = ImageDraw.Draw(scrim)
|
| 209 |
+
feather = 60
|
| 210 |
+
fade_top = max(0, y_top - feather)
|
| 211 |
+
fade_bottom = min(h, y_bottom + feather)
|
| 212 |
+
|
| 213 |
+
for y in range(fade_top, fade_bottom):
|
| 214 |
+
if y < y_top:
|
| 215 |
+
t = (y - fade_top) / max(1, y_top - fade_top)
|
| 216 |
+
elif y > y_bottom:
|
| 217 |
+
t = 1.0 - (y - y_bottom) / max(1, fade_bottom - y_bottom)
|
| 218 |
+
else:
|
| 219 |
+
t = 1.0
|
| 220 |
+
alpha = min(int(200 * t * intensity), 215)
|
| 221 |
+
draw.line([(0, y), (w, y)], fill=(0, 0, 0, alpha))
|
| 222 |
+
|
| 223 |
+
base = image.convert("RGBA")
|
| 224 |
+
return Image.alpha_composite(base, scrim).convert("RGB")
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ---------------------------------------------------------------------------
|
| 228 |
+
# Text-rendering primitives
|
| 229 |
+
# ---------------------------------------------------------------------------
|
| 230 |
+
|
| 231 |
+
def _shadowed(
|
| 232 |
+
draw: ImageDraw.ImageDraw,
|
| 233 |
+
xy: tuple[int, int],
|
| 234 |
+
text: str,
|
| 235 |
+
font: ImageFont.FreeTypeFont,
|
| 236 |
+
fill: str = "#FFFFFF",
|
| 237 |
+
shadow_color: str = "#000000",
|
| 238 |
+
shadow_offset: int = 4,
|
| 239 |
+
anchor: str = "lt",
|
| 240 |
+
) -> None:
|
| 241 |
+
"""Render text with a layered drop shadow and thin stroke for legibility."""
|
| 242 |
+
x, y = xy
|
| 243 |
+
draw.text((x + shadow_offset, y + shadow_offset), text, font=font, fill=(0, 0, 0, 220), anchor=anchor)
|
| 244 |
+
draw.text((x + shadow_offset * 2, y + shadow_offset * 2), text, font=font, fill=(0, 0, 0, 100), anchor=anchor)
|
| 245 |
+
draw.text(xy, text, font=font, fill=fill, stroke_width=2, stroke_fill=shadow_color, anchor=anchor)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def _pill(
|
| 249 |
+
draw: ImageDraw.ImageDraw,
|
| 250 |
+
xy: tuple[int, int],
|
| 251 |
+
text: str,
|
| 252 |
+
font: ImageFont.FreeTypeFont,
|
| 253 |
+
fill: str = "#FFFFFF",
|
| 254 |
+
bg: tuple[int, ...] = (0, 0, 0, 160),
|
| 255 |
+
padding: int = 12,
|
| 256 |
+
anchor: str = "lt",
|
| 257 |
+
) -> None:
|
| 258 |
+
"""Render text on a semi-transparent rounded-rectangle background."""
|
| 259 |
+
bb = font.getbbox(text, anchor=anchor)
|
| 260 |
+
x, y = xy
|
| 261 |
+
draw.rounded_rectangle(
|
| 262 |
+
[
|
| 263 |
+
(x + bb[0] - padding, y + bb[1] - padding),
|
| 264 |
+
(x + bb[2] + padding, y + bb[3] + padding),
|
| 265 |
+
],
|
| 266 |
+
radius=8,
|
| 267 |
+
fill=bg,
|
| 268 |
+
)
|
| 269 |
+
draw.text(xy, text, font=font, fill=fill, anchor=anchor)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# ---------------------------------------------------------------------------
|
| 273 |
+
# Block height estimator
|
| 274 |
+
# ---------------------------------------------------------------------------
|
| 275 |
+
|
| 276 |
+
def _estimate_block_height(
|
| 277 |
+
title: str, subtitle: str, date: str, venue: str, organizer: str, style: Style,
|
| 278 |
+
) -> int:
|
| 279 |
+
lines, size = _wrap_title(title, style)
|
| 280 |
+
font_title = load_font(
|
| 281 |
+
"PlayfairDisplay-Bold" if style == "elegant" else "Montserrat-ExtraBold", size
|
| 282 |
+
)
|
| 283 |
+
total = sum(_text_size(ln, font_title)[1] + 12 for ln in lines) + 24
|
| 284 |
+
if subtitle: total += 34
|
| 285 |
+
if date: total += 28
|
| 286 |
+
if venue: total += 28
|
| 287 |
+
if organizer: total += 36
|
| 288 |
+
return total + 40
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
# ---------------------------------------------------------------------------
|
| 292 |
+
# Layout: MODERN
|
| 293 |
+
# ---------------------------------------------------------------------------
|
| 294 |
+
|
| 295 |
+
def _layout_modern(
|
| 296 |
+
draw: ImageDraw.ImageDraw, w: int, h: int,
|
| 297 |
+
title: str, subtitle: str, date: str, venue: str,
|
| 298 |
+
organizer: str, accent: str, start_y: int,
|
| 299 |
+
) -> None:
|
| 300 |
+
"""Centered layout with accent bars top and bottom."""
|
| 301 |
+
cx = w // 2
|
| 302 |
+
gap = 16
|
| 303 |
+
|
| 304 |
+
draw.rectangle([(0, 0), (w, 5)], fill=accent)
|
| 305 |
+
if organizer:
|
| 306 |
+
font_org = load_font("Montserrat-Medium", 22)
|
| 307 |
+
_shadowed(draw, (cx, 28), organizer.upper(), font_org, anchor="mt")
|
| 308 |
+
|
| 309 |
+
cursor = start_y
|
| 310 |
+
lines, size = _wrap_title(title, "modern")
|
| 311 |
+
font_title = load_font("Montserrat-ExtraBold", size)
|
| 312 |
+
for line in lines:
|
| 313 |
+
_shadowed(draw, (cx, cursor), line, font_title, anchor="mt")
|
| 314 |
+
cursor += _text_size(line, font_title)[1] + 12
|
| 315 |
+
|
| 316 |
+
cursor += 6
|
| 317 |
+
draw.rectangle([(cx - 90, cursor), (cx + 90, cursor + 3)], fill=accent)
|
| 318 |
+
cursor += 3 + gap
|
| 319 |
+
|
| 320 |
+
if subtitle:
|
| 321 |
+
font_sub = load_font("PlayfairDisplay-Regular", 26)
|
| 322 |
+
_shadowed(draw, (cx, cursor), subtitle, font_sub, fill=accent, anchor="mt")
|
| 323 |
+
cursor += _text_size(subtitle, font_sub)[1] + gap
|
| 324 |
+
|
| 325 |
+
parts: list[str] = []
|
| 326 |
+
if date: parts.append(f"📅 {date}")
|
| 327 |
+
if venue: parts.append(f"📍 {venue}")
|
| 328 |
+
if parts:
|
| 329 |
+
font_info = load_font("Montserrat-Regular", 18)
|
| 330 |
+
_pill(draw, (cx, cursor), " • ".join(parts), font_info,
|
| 331 |
+
bg=(0, 0, 0, 170), anchor="mt")
|
| 332 |
+
|
| 333 |
+
draw.rectangle([(0, h - 5), (w, h)], fill=accent)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
# ---------------------------------------------------------------------------
|
| 337 |
+
# Layout: BOLD
|
| 338 |
+
# ---------------------------------------------------------------------------
|
| 339 |
+
|
| 340 |
+
def _layout_bold(
|
| 341 |
+
draw: ImageDraw.ImageDraw, w: int, h: int,
|
| 342 |
+
title: str, subtitle: str, date: str, venue: str,
|
| 343 |
+
organizer: str, accent: str, start_y: int,
|
| 344 |
+
) -> None:
|
| 345 |
+
"""Left-aligned heavy display. Organiser pill pinned top-right."""
|
| 346 |
+
LEFT = 50
|
| 347 |
+
gap = 18
|
| 348 |
+
|
| 349 |
+
draw.rectangle([(0, 0), (6, h)], fill=accent)
|
| 350 |
+
draw.rectangle([(w - 6, 0), (w, h)], fill=accent)
|
| 351 |
+
|
| 352 |
+
# Organiser — top-right so it never clashes with left-aligned title
|
| 353 |
+
if organizer:
|
| 354 |
+
font_org = load_font("Montserrat-Bold", 18)
|
| 355 |
+
_pill(draw, (w - LEFT, 28), organizer.upper(), font_org,
|
| 356 |
+
fill=accent, bg=(0, 0, 0, 200), padding=10, anchor="rt")
|
| 357 |
+
|
| 358 |
+
cursor = start_y
|
| 359 |
+
lines, size = _wrap_title(title, "bold")
|
| 360 |
+
font_title = load_font("Montserrat-ExtraBold", size)
|
| 361 |
+
for line in lines:
|
| 362 |
+
_shadowed(draw, (LEFT, cursor), line, font_title, shadow_offset=5)
|
| 363 |
+
cursor += _text_size(line, font_title)[1] + 8
|
| 364 |
+
cursor += gap
|
| 365 |
+
|
| 366 |
+
if subtitle:
|
| 367 |
+
font_sub = load_font("Montserrat-Bold", 24)
|
| 368 |
+
_shadowed(draw, (LEFT, cursor), subtitle.upper(), font_sub, fill=accent)
|
| 369 |
+
cursor += _text_size(subtitle.upper(), font_sub)[1] + gap
|
| 370 |
+
|
| 371 |
+
font_info = load_font("Montserrat-Regular", 20)
|
| 372 |
+
if date:
|
| 373 |
+
_shadowed(draw, (LEFT, cursor), f"📅 {date}", font_info, fill="#DDDDDD")
|
| 374 |
+
cursor += _text_size(f"📅 {date}", font_info)[1] + 10
|
| 375 |
+
if venue:
|
| 376 |
+
_shadowed(draw, (LEFT, cursor), f"📍 {venue}", font_info, fill="#DDDDDD")
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# ---------------------------------------------------------------------------
|
| 380 |
+
# Layout: ELEGANT
|
| 381 |
+
# ---------------------------------------------------------------------------
|
| 382 |
+
|
| 383 |
+
def _layout_elegant(
|
| 384 |
+
draw: ImageDraw.ImageDraw, w: int, h: int,
|
| 385 |
+
title: str, subtitle: str, date: str, venue: str,
|
| 386 |
+
organizer: str, accent: str, start_y: int,
|
| 387 |
+
) -> None:
|
| 388 |
+
"""Centered serif layout with fine horizontal rules."""
|
| 389 |
+
cx = w // 2
|
| 390 |
+
rule_w = 160
|
| 391 |
+
gap = 18
|
| 392 |
+
|
| 393 |
+
draw.rectangle([(cx - rule_w, 46), (cx + rule_w, 48)], fill=accent)
|
| 394 |
+
if organizer:
|
| 395 |
+
font_org = load_font("Montserrat-Medium", 20)
|
| 396 |
+
_shadowed(draw, (cx, 62), organizer, font_org, anchor="mt")
|
| 397 |
+
draw.rectangle([(cx - rule_w, 94), (cx + rule_w, 96)], fill=accent)
|
| 398 |
+
|
| 399 |
+
cursor = start_y
|
| 400 |
+
lines, size = _wrap_title(title, "elegant")
|
| 401 |
+
font_title = load_font("PlayfairDisplay-Bold", size)
|
| 402 |
+
for line in lines:
|
| 403 |
+
_shadowed(draw, (cx, cursor), line, font_title,
|
| 404 |
+
shadow_color="#1A1A1A", shadow_offset=3, anchor="mt")
|
| 405 |
+
cursor += _text_size(line, font_title)[1] + 14
|
| 406 |
+
|
| 407 |
+
cursor += 8
|
| 408 |
+
draw.rectangle([(cx - 60, cursor), (cx + 60, cursor + 1)], fill=accent)
|
| 409 |
+
cursor += 1 + gap
|
| 410 |
+
|
| 411 |
+
if subtitle:
|
| 412 |
+
font_sub = load_font("PlayfairDisplay-Regular", 26)
|
| 413 |
+
_shadowed(draw, (cx, cursor), subtitle, font_sub, fill=accent, anchor="mt")
|
| 414 |
+
cursor += _text_size(subtitle, font_sub)[1] + gap
|
| 415 |
+
|
| 416 |
+
font_info = load_font("Montserrat-Regular", 17)
|
| 417 |
+
if date:
|
| 418 |
+
_shadowed(draw, (cx, cursor), date.upper(), font_info,
|
| 419 |
+
fill="#E8E8E8", anchor="mt")
|
| 420 |
+
cursor += _text_size(date.upper(), font_info)[1] + 8
|
| 421 |
+
if venue:
|
| 422 |
+
_pill(draw, (cx, cursor), venue, font_info,
|
| 423 |
+
fill="#FFFFFF", bg=(0, 0, 0, 150), padding=10, anchor="mt")
|
| 424 |
+
|
| 425 |
+
draw.rectangle([(cx - rule_w, h - 48), (cx + rule_w, h - 46)], fill=accent)
|
| 426 |
+
draw.rectangle([(cx - rule_w, h - 36), (cx + rule_w, h - 34)], fill=accent)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
# ---------------------------------------------------------------------------
|
| 430 |
+
# Public API
|
| 431 |
+
# ---------------------------------------------------------------------------
|
| 432 |
+
|
| 433 |
+
_LAYOUTS = {
|
| 434 |
+
"modern": _layout_modern,
|
| 435 |
+
"bold": _layout_bold,
|
| 436 |
+
"elegant": _layout_elegant,
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
def composite_poster(
|
| 441 |
+
artwork: Image.Image,
|
| 442 |
+
title: str,
|
| 443 |
+
subtitle: str = "",
|
| 444 |
+
date: str = "",
|
| 445 |
+
venue: str = "",
|
| 446 |
+
organizer: str = "",
|
| 447 |
+
accent_color: str = "#FFD700",
|
| 448 |
+
style: Style = "modern",
|
| 449 |
+
text_position: Position = "auto",
|
| 450 |
+
scrim: bool = True,
|
| 451 |
+
) -> Image.Image:
|
| 452 |
+
"""Composite event text onto an SDXL artwork image.
|
| 453 |
+
|
| 454 |
+
Args:
|
| 455 |
+
artwork: Raw SDXL-generated PIL Image.
|
| 456 |
+
title: Primary event name (required).
|
| 457 |
+
subtitle: Short tagline or theme (optional).
|
| 458 |
+
date: Human-readable date string (optional).
|
| 459 |
+
venue: Location or venue name (optional).
|
| 460 |
+
organizer: Host shown at the top of the poster (optional).
|
| 461 |
+
accent_color: Hex colour for decorative elements and rules.
|
| 462 |
+
style: "modern" | "bold" | "elegant"
|
| 463 |
+
text_position: "auto" — detect quietest region automatically.
|
| 464 |
+
"top" — pin text block near the top (y=14%).
|
| 465 |
+
"center" — pin to vertical centre (y=50%).
|
| 466 |
+
"bottom" — pin to bottom area (y=80%).
|
| 467 |
+
"none" — return artwork unchanged.
|
| 468 |
+
scrim: True — feathered dark gradient under text block only.
|
| 469 |
+
Intensity is style-aware: bold=0.90, others lower.
|
| 470 |
+
False — no scrim; rely on shadow/stroke for legibility.
|
| 471 |
+
Use for bright, vivid, or light-bg artworks.
|
| 472 |
+
|
| 473 |
+
Returns:
|
| 474 |
+
Composited PIL Image (RGB).
|
| 475 |
+
"""
|
| 476 |
+
if text_position == "none":
|
| 477 |
+
return artwork.copy().convert("RGB")
|
| 478 |
+
|
| 479 |
+
ensure_fonts()
|
| 480 |
+
|
| 481 |
+
img = artwork.copy().convert("RGB")
|
| 482 |
+
w, h = img.size
|
| 483 |
+
|
| 484 |
+
block_h = _estimate_block_height(title, subtitle, date, venue, organizer, style)
|
| 485 |
+
|
| 486 |
+
if text_position in _POSITION_RATIOS:
|
| 487 |
+
y_center = int(h * _POSITION_RATIOS[text_position])
|
| 488 |
+
else:
|
| 489 |
+
y_center, _, _, _ = _find_text_region(img, block_h)
|
| 490 |
+
|
| 491 |
+
pad = 44
|
| 492 |
+
if scrim:
|
| 493 |
+
scrim_top = max(0, y_center - block_h // 2 - pad)
|
| 494 |
+
scrim_bottom = min(h, y_center + block_h // 2 + pad)
|
| 495 |
+
scrim_intensity = _SCRIM_INTENSITY.get(style, 0.78)
|
| 496 |
+
img = _apply_scrim(img, scrim_top, scrim_bottom, intensity=scrim_intensity)
|
| 497 |
+
|
| 498 |
+
overlay = Image.new("RGBA", (w, h), (0, 0, 0, 0))
|
| 499 |
+
draw = ImageDraw.Draw(overlay)
|
| 500 |
+
|
| 501 |
+
text_start_y = max(pad, y_center - block_h // 2)
|
| 502 |
+
_LAYOUTS.get(style, _layout_modern)(
|
| 503 |
+
draw, w, h, title, subtitle, date, venue, organizer, accent_color, text_start_y
|
| 504 |
+
)
|
| 505 |
+
|
| 506 |
+
result = Image.alpha_composite(img.convert("RGBA"), overlay)
|
| 507 |
+
return result.convert("RGB")
|
scripts/quality_filter.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Image Quality Filter (GPU-Accelerated)
|
| 4 |
+
Filters raw scraped images based on resolution, sharpness, aspect ratio,
|
| 5 |
+
file size, and deduplication. Uses GPU for batch sharpness and color analysis.
|
| 6 |
+
Outputs high-quality images to data/processed/.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import json
|
| 12 |
+
import shutil
|
| 13 |
+
import logging
|
| 14 |
+
import argparse
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from collections import defaultdict
|
| 17 |
+
|
| 18 |
+
import yaml
|
| 19 |
+
import cv2
|
| 20 |
+
import numpy as np
|
| 21 |
+
import imagehash
|
| 22 |
+
import torch
|
| 23 |
+
import torch.nn.functional as F
|
| 24 |
+
from PIL import Image
|
| 25 |
+
from tqdm import tqdm
|
| 26 |
+
|
| 27 |
+
# ─── SM120 (Blackwell) CUDA optimizations ───────────────────────────────────
|
| 28 |
+
if torch.cuda.is_available():
|
| 29 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 30 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 31 |
+
|
| 32 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 33 |
+
# Logging
|
| 34 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 35 |
+
logging.basicConfig(
|
| 36 |
+
level=logging.INFO,
|
| 37 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 38 |
+
)
|
| 39 |
+
logger = logging.getLogger(__name__)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 43 |
+
# Config
|
| 44 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 45 |
+
def load_config(config_path: str = "configs/config.yaml") -> dict:
|
| 46 |
+
with open(config_path, "r", encoding="utf-8") as f:
|
| 47 |
+
return yaml.safe_load(f)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 51 |
+
# GPU-Accelerated Quality Checker
|
| 52 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 53 |
+
class ImageQualityChecker:
|
| 54 |
+
"""
|
| 55 |
+
Evaluate image quality using GPU-accelerated sharpness and color analysis.
|
| 56 |
+
Falls back to CPU if no CUDA device is available.
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
# Laplacian kernel for GPU sharpness detection
|
| 60 |
+
LAPLACIAN_KERNEL = torch.tensor(
|
| 61 |
+
[[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=torch.float32
|
| 62 |
+
).unsqueeze(0).unsqueeze(0)
|
| 63 |
+
|
| 64 |
+
def __init__(
|
| 65 |
+
self,
|
| 66 |
+
min_resolution: int = 512,
|
| 67 |
+
min_sharpness: float = 50.0,
|
| 68 |
+
min_aspect_ratio: float = 0.4,
|
| 69 |
+
max_aspect_ratio: float = 2.5,
|
| 70 |
+
min_file_size_kb: int = 20,
|
| 71 |
+
max_file_size_mb: int = 50,
|
| 72 |
+
device: str = "auto",
|
| 73 |
+
):
|
| 74 |
+
self.min_resolution = min_resolution
|
| 75 |
+
self.min_sharpness = min_sharpness
|
| 76 |
+
self.min_aspect_ratio = min_aspect_ratio
|
| 77 |
+
self.max_aspect_ratio = max_aspect_ratio
|
| 78 |
+
self.min_file_size_bytes = min_file_size_kb * 1024
|
| 79 |
+
self.max_file_size_bytes = max_file_size_mb * 1024 * 1024
|
| 80 |
+
|
| 81 |
+
# GPU setup
|
| 82 |
+
if device == "auto":
|
| 83 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 84 |
+
else:
|
| 85 |
+
self.device = torch.device(device)
|
| 86 |
+
|
| 87 |
+
self._kernel = self.LAPLACIAN_KERNEL.to(self.device)
|
| 88 |
+
logger.info(f"Quality checker using device: {self.device}")
|
| 89 |
+
|
| 90 |
+
def _gpu_sharpness(self, img_array: np.ndarray) -> float:
|
| 91 |
+
"""Compute sharpness using Laplacian on GPU."""
|
| 92 |
+
# Convert to grayscale
|
| 93 |
+
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 94 |
+
|
| 95 |
+
# Move to GPU as torch tensor
|
| 96 |
+
tensor = torch.from_numpy(gray.astype(np.float32)).unsqueeze(0).unsqueeze(0)
|
| 97 |
+
tensor = tensor.to(self.device)
|
| 98 |
+
|
| 99 |
+
# Apply Laplacian convolution on GPU
|
| 100 |
+
laplacian = F.conv2d(tensor, self._kernel, padding=1)
|
| 101 |
+
sharpness = laplacian.var().item()
|
| 102 |
+
|
| 103 |
+
return sharpness
|
| 104 |
+
|
| 105 |
+
def _gpu_color_std(self, img_array: np.ndarray) -> float:
|
| 106 |
+
"""Compute color standard deviation on GPU."""
|
| 107 |
+
tensor = torch.from_numpy(img_array.astype(np.float32)).to(self.device)
|
| 108 |
+
return tensor.std().item()
|
| 109 |
+
|
| 110 |
+
def check(self, image_path: Path) -> tuple[bool, dict]:
|
| 111 |
+
"""
|
| 112 |
+
Check image quality. Returns (passed, metrics_dict).
|
| 113 |
+
Sharpness and color checks run on GPU.
|
| 114 |
+
"""
|
| 115 |
+
metrics = {
|
| 116 |
+
"path": str(image_path),
|
| 117 |
+
"passed": False,
|
| 118 |
+
"reason": None,
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
# File size check (CPU — trivial)
|
| 122 |
+
file_size = image_path.stat().st_size
|
| 123 |
+
metrics["file_size_bytes"] = file_size
|
| 124 |
+
if file_size < self.min_file_size_bytes:
|
| 125 |
+
metrics["reason"] = "file_too_small"
|
| 126 |
+
return False, metrics
|
| 127 |
+
if file_size > self.max_file_size_bytes:
|
| 128 |
+
metrics["reason"] = "file_too_large"
|
| 129 |
+
return False, metrics
|
| 130 |
+
|
| 131 |
+
# Load image
|
| 132 |
+
try:
|
| 133 |
+
img = Image.open(image_path).convert("RGB")
|
| 134 |
+
except Exception:
|
| 135 |
+
metrics["reason"] = "unreadable"
|
| 136 |
+
return False, metrics
|
| 137 |
+
|
| 138 |
+
w, h = img.size
|
| 139 |
+
metrics["width"] = w
|
| 140 |
+
metrics["height"] = h
|
| 141 |
+
|
| 142 |
+
# Resolution check (CPU — trivial)
|
| 143 |
+
if min(w, h) < self.min_resolution:
|
| 144 |
+
metrics["reason"] = "low_resolution"
|
| 145 |
+
return False, metrics
|
| 146 |
+
|
| 147 |
+
# Aspect ratio check (CPU — trivial)
|
| 148 |
+
aspect = w / h
|
| 149 |
+
metrics["aspect_ratio"] = round(aspect, 3)
|
| 150 |
+
if aspect < self.min_aspect_ratio or aspect > self.max_aspect_ratio:
|
| 151 |
+
metrics["reason"] = "bad_aspect_ratio"
|
| 152 |
+
return False, metrics
|
| 153 |
+
|
| 154 |
+
img_array = np.array(img)
|
| 155 |
+
|
| 156 |
+
# Sharpness check (GPU-accelerated Laplacian)
|
| 157 |
+
try:
|
| 158 |
+
sharpness = self._gpu_sharpness(img_array)
|
| 159 |
+
metrics["sharpness"] = round(sharpness, 2)
|
| 160 |
+
if sharpness < self.min_sharpness:
|
| 161 |
+
metrics["reason"] = "too_blurry"
|
| 162 |
+
return False, metrics
|
| 163 |
+
except Exception:
|
| 164 |
+
metrics["reason"] = "sharpness_check_failed"
|
| 165 |
+
return False, metrics
|
| 166 |
+
|
| 167 |
+
# Color variance check (GPU-accelerated)
|
| 168 |
+
std = self._gpu_color_std(img_array)
|
| 169 |
+
metrics["color_std"] = round(float(std), 2)
|
| 170 |
+
if std < 15.0:
|
| 171 |
+
metrics["reason"] = "too_uniform"
|
| 172 |
+
return False, metrics
|
| 173 |
+
|
| 174 |
+
metrics["passed"] = True
|
| 175 |
+
return True, metrics
|
| 176 |
+
|
| 177 |
+
def check_batch(self, image_paths: list[Path]) -> list[tuple[bool, dict]]:
|
| 178 |
+
"""
|
| 179 |
+
Batch quality check — processes multiple images with GPU acceleration.
|
| 180 |
+
Pre-filters by file size and resolution on CPU, then batches
|
| 181 |
+
GPU operations for remaining images.
|
| 182 |
+
"""
|
| 183 |
+
results = []
|
| 184 |
+
for path in image_paths:
|
| 185 |
+
results.append(self.check(path))
|
| 186 |
+
return results
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 190 |
+
# Deduplicator
|
| 191 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 192 |
+
class Deduplicator:
|
| 193 |
+
"""Remove near-duplicate images using perceptual hashing."""
|
| 194 |
+
|
| 195 |
+
def __init__(self, hash_size: int = 8, threshold: int = 5):
|
| 196 |
+
self.hash_size = hash_size
|
| 197 |
+
self.threshold = threshold
|
| 198 |
+
self.hashes: dict[str, "imagehash.ImageHash"] = {}
|
| 199 |
+
|
| 200 |
+
def is_duplicate(self, image_path: Path) -> bool:
|
| 201 |
+
try:
|
| 202 |
+
img = Image.open(image_path).convert("RGB")
|
| 203 |
+
h = imagehash.phash(img, hash_size=self.hash_size)
|
| 204 |
+
for existing_path, existing_hash in self.hashes.items():
|
| 205 |
+
if abs(h - existing_hash) <= self.threshold:
|
| 206 |
+
return True
|
| 207 |
+
self.hashes[str(image_path)] = h
|
| 208 |
+
return False
|
| 209 |
+
except Exception:
|
| 210 |
+
return True # Can't hash → treat as duplicate
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
class GPUHasher:
|
| 214 |
+
"""
|
| 215 |
+
GPU-accelerated Perceptual Hashing (pHash).
|
| 216 |
+
Strictly forces GPU usage.
|
| 217 |
+
"""
|
| 218 |
+
def __init__(self, device="cuda"):
|
| 219 |
+
if not torch.cuda.is_available():
|
| 220 |
+
raise RuntimeError("❌ CUDA is not available! GPUHasher requires a GPU.")
|
| 221 |
+
|
| 222 |
+
self.device = device
|
| 223 |
+
logger.info(f"⚡ GPUHasher initialized on: {str(self.device).upper()}")
|
| 224 |
+
self.dct_matrix = self._get_dct_matrix(32).to(self.device)
|
| 225 |
+
|
| 226 |
+
def _get_dct_matrix(self, N):
|
| 227 |
+
"""Standard DCT-II matrix."""
|
| 228 |
+
dct_m = np.zeros((N, N))
|
| 229 |
+
for k in range(N):
|
| 230 |
+
for n in range(N):
|
| 231 |
+
dct_m[k, n] = np.cos(np.pi / N * (n + 0.5) * k)
|
| 232 |
+
return torch.from_numpy(dct_m).float()
|
| 233 |
+
|
| 234 |
+
def compute_hashes(self, image_paths: list[Path], batch_size=64) -> dict[str, int]:
|
| 235 |
+
"""
|
| 236 |
+
Compute pHash for a list of image paths using GPU acceleration.
|
| 237 |
+
Returns dictionary {path_str: hash_int}
|
| 238 |
+
"""
|
| 239 |
+
results = {}
|
| 240 |
+
|
| 241 |
+
# Use tqdm for progress bar
|
| 242 |
+
with tqdm(total=len(image_paths), desc=" Computing hashes (GPU)", unit="img") as pbar:
|
| 243 |
+
for i in range(0, len(image_paths), batch_size):
|
| 244 |
+
batch_paths = image_paths[i : i + batch_size]
|
| 245 |
+
batch_tensors = []
|
| 246 |
+
valid_paths = []
|
| 247 |
+
|
| 248 |
+
for p in batch_paths:
|
| 249 |
+
try:
|
| 250 |
+
# Open (L = grayscale)
|
| 251 |
+
# We avoid PIL.resize here to save CPU
|
| 252 |
+
img = Image.open(p).convert("L")
|
| 253 |
+
|
| 254 |
+
# Convert to tensor [1, H, W] directly
|
| 255 |
+
t = torch.from_numpy(np.array(img)).float().unsqueeze(0) / 255.0
|
| 256 |
+
batch_tensors.append(t)
|
| 257 |
+
valid_paths.append(str(p))
|
| 258 |
+
except Exception:
|
| 259 |
+
pass
|
| 260 |
+
|
| 261 |
+
# Update pbar for the batch processed
|
| 262 |
+
pbar.update(len(batch_paths))
|
| 263 |
+
|
| 264 |
+
if not batch_tensors:
|
| 265 |
+
continue
|
| 266 |
+
|
| 267 |
+
# GPU Processing
|
| 268 |
+
try:
|
| 269 |
+
gpu_tensors = []
|
| 270 |
+
for t in batch_tensors:
|
| 271 |
+
# Move to GPU
|
| 272 |
+
t_gpu = t.to(self.device, non_blocking=True).unsqueeze(0) # [1, 1, H, W]
|
| 273 |
+
# Resize on GPU
|
| 274 |
+
t_resized = F.interpolate(t_gpu, size=(32, 32), mode='bilinear', align_corners=False)
|
| 275 |
+
gpu_tensors.append(t_resized.squeeze(0)) # [1, 32, 32]
|
| 276 |
+
|
| 277 |
+
# Stack: [B, 32, 32]
|
| 278 |
+
pixel_batch = torch.stack(gpu_tensors).squeeze(1)
|
| 279 |
+
|
| 280 |
+
# Compute DCT: D * I * D^T
|
| 281 |
+
# [32, 32] @ [B, 32, 32] @ [32, 32] -> [B, 32, 32]
|
| 282 |
+
dct = torch.matmul(self.dct_matrix, pixel_batch)
|
| 283 |
+
dct = torch.matmul(dct, self.dct_matrix.T)
|
| 284 |
+
|
| 285 |
+
# Extract top-left 8x8 (excluding DC term at 0,0)
|
| 286 |
+
# Flatten to [B, 64]
|
| 287 |
+
dct_low = dct[:, :8, :8].reshape(-1, 64)
|
| 288 |
+
|
| 289 |
+
# Compute median per image
|
| 290 |
+
medians = dct_low.median(dim=1, keepdim=True).values
|
| 291 |
+
|
| 292 |
+
# Generate hash: 1 if > median, 0 otherwise
|
| 293 |
+
bits = (dct_low > medians).long()
|
| 294 |
+
|
| 295 |
+
# Convert 64 bits to integer
|
| 296 |
+
# Powers of 2 vector: [2^0, 2^1, ... 2^63]
|
| 297 |
+
powers = (2 ** torch.arange(64, device=self.device)).long()
|
| 298 |
+
hashes = (bits * powers).sum(dim=1).cpu().numpy()
|
| 299 |
+
|
| 300 |
+
for p, h in zip(valid_paths, hashes):
|
| 301 |
+
results[p] = int(h)
|
| 302 |
+
|
| 303 |
+
except Exception as e:
|
| 304 |
+
logger.debug(f"GPU Hash batch failed: {e}")
|
| 305 |
+
continue
|
| 306 |
+
|
| 307 |
+
return results
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 311 |
+
# Main Pipeline
|
| 312 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 313 |
+
|
| 314 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 315 |
+
# Main Pipeline
|
| 316 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 317 |
+
def run_quality_filter(config: dict) -> dict:
|
| 318 |
+
"""Main quality filter pipeline (GPU-accelerated) with Auto-Scrape Top-Up."""
|
| 319 |
+
from pinterest_scraper import PinterestScraper, DEFAULT_QUERIES # Lazy import to avoid circular deps
|
| 320 |
+
|
| 321 |
+
raw_dir = Path(config["paths"]["data"]["raw"])
|
| 322 |
+
processed_dir = Path(config["paths"]["data"]["processed"])
|
| 323 |
+
|
| 324 |
+
TARGET_COUNT = 1300
|
| 325 |
+
|
| 326 |
+
if not raw_dir.exists():
|
| 327 |
+
logger.error(f"Raw data directory does not exist: {raw_dir}")
|
| 328 |
+
sys.exit(1)
|
| 329 |
+
|
| 330 |
+
# Quality settings from config
|
| 331 |
+
quality_cfg = config.get("dataset", {}).get("quality", {})
|
| 332 |
+
|
| 333 |
+
checker = ImageQualityChecker(
|
| 334 |
+
min_resolution=quality_cfg.get("min_resolution", 512),
|
| 335 |
+
min_sharpness=quality_cfg.get("min_sharpness", 50.0),
|
| 336 |
+
min_aspect_ratio=quality_cfg.get("min_aspect_ratio", 0.4),
|
| 337 |
+
max_aspect_ratio=quality_cfg.get("max_aspect_ratio", 2.5),
|
| 338 |
+
)
|
| 339 |
+
dedup = Deduplicator()
|
| 340 |
+
|
| 341 |
+
# Initialize scraper (but don't start driver yet)
|
| 342 |
+
scraper = PinterestScraper(config, str(raw_dir))
|
| 343 |
+
|
| 344 |
+
# Log GPU status
|
| 345 |
+
if torch.cuda.is_available():
|
| 346 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 347 |
+
gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 348 |
+
logger.info(f"🎮 GPU detected: {gpu_name}. Total memory: {gpu_mem:.2f} GB")
|
| 349 |
+
else:
|
| 350 |
+
logger.info("🖥️ No GPU detected — running on CPU (slower)")
|
| 351 |
+
|
| 352 |
+
# Stats
|
| 353 |
+
stats = defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "duplicates": 0})
|
| 354 |
+
|
| 355 |
+
# 1. LOAD ALL EXISTING PROCESSED IMAGES (Global Deduplication)
|
| 356 |
+
logger.info("🧠 Learning ALL existing images to prevent duplicates...")
|
| 357 |
+
all_processed_files = []
|
| 358 |
+
for root, _, files in os.walk(processed_dir):
|
| 359 |
+
for file in files:
|
| 360 |
+
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp')):
|
| 361 |
+
all_processed_files.append(Path(root) / file)
|
| 362 |
+
|
| 363 |
+
existing_hashes = 0
|
| 364 |
+
if all_processed_files:
|
| 365 |
+
hasher = GPUHasher()
|
| 366 |
+
# Compute hashes for everything currently in processed
|
| 367 |
+
batch_hashes = hasher.compute_hashes(all_processed_files, batch_size=128)
|
| 368 |
+
dedup.hashes.update(batch_hashes)
|
| 369 |
+
existing_hashes = len(batch_hashes)
|
| 370 |
+
|
| 371 |
+
logger.info(f"✅ Memorized {existing_hashes} unique images in processed dataset.")
|
| 372 |
+
|
| 373 |
+
# Collect all leaf directories (directories that contain images, not just parents)
|
| 374 |
+
leaf_dirs = []
|
| 375 |
+
for root, dirs, files in os.walk(raw_dir):
|
| 376 |
+
root_path = Path(root)
|
| 377 |
+
# Check if this is a leaf node we want to process
|
| 378 |
+
# (It might be empty now but was scraped before, or we want to scrape it)
|
| 379 |
+
# For now, rely on existing folders in raw.
|
| 380 |
+
rel_path = root_path.relative_to(raw_dir)
|
| 381 |
+
|
| 382 |
+
# Skip the root directory itself (files directly in data/raw)
|
| 383 |
+
if str(rel_path) == ".":
|
| 384 |
+
continue
|
| 385 |
+
|
| 386 |
+
leaf_dirs.append((rel_path, root_path))
|
| 387 |
+
|
| 388 |
+
if not leaf_dirs:
|
| 389 |
+
logger.warning("No directories found in raw data.")
|
| 390 |
+
return {}
|
| 391 |
+
|
| 392 |
+
logger.info(f"Found {len(leaf_dirs)} theme directories to process")
|
| 393 |
+
|
| 394 |
+
for rel_path, dir_path in sorted(leaf_dirs):
|
| 395 |
+
category = str(rel_path).replace("\\", "/")
|
| 396 |
+
out_dir = processed_dir / rel_path
|
| 397 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 398 |
+
|
| 399 |
+
# We assume leaf dir if it has no subdirs with images?
|
| 400 |
+
# Simpler: just process if we found it.
|
| 401 |
+
|
| 402 |
+
while True:
|
| 403 |
+
# Check current status in processed folder
|
| 404 |
+
processed_images = [f for f in os.listdir(out_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
|
| 405 |
+
current_count = len(processed_images)
|
| 406 |
+
|
| 407 |
+
# If we met the target, break loop and move to next category
|
| 408 |
+
if current_count >= TARGET_COUNT:
|
| 409 |
+
logger.info(f"✅ {category}: Target met ({current_count} images).")
|
| 410 |
+
break
|
| 411 |
+
|
| 412 |
+
needed = TARGET_COUNT - current_count
|
| 413 |
+
logger.info(f"\nCategory: {category}")
|
| 414 |
+
logger.info(f" Current: {current_count} | Needed: {needed}")
|
| 415 |
+
|
| 416 |
+
# Get raw images
|
| 417 |
+
raw_images = sorted([
|
| 418 |
+
dir_path / f for f in os.listdir(dir_path)
|
| 419 |
+
if f.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp'))
|
| 420 |
+
])
|
| 421 |
+
logger.info(f" Raw images available: {len(raw_images)}")
|
| 422 |
+
|
| 423 |
+
# Identify candidates (raw images NOT yet in processed folder by filename)
|
| 424 |
+
existing_filenames = set(processed_images)
|
| 425 |
+
candidates = [p for p in raw_images if p.name not in existing_filenames]
|
| 426 |
+
|
| 427 |
+
added_this_round = 0
|
| 428 |
+
|
| 429 |
+
if candidates:
|
| 430 |
+
logger.info(f" Processing {len(candidates)} new candidates...")
|
| 431 |
+
pbar = tqdm(candidates, desc=f" {category} (Filter)", unit="img")
|
| 432 |
+
for img_path in pbar:
|
| 433 |
+
if added_this_round >= needed:
|
| 434 |
+
break
|
| 435 |
+
|
| 436 |
+
stats[category]["total"] += 1
|
| 437 |
+
|
| 438 |
+
# Quality check (GPU-accelerated sharpness + color)
|
| 439 |
+
passed, metrics = checker.check(img_path)
|
| 440 |
+
if not passed:
|
| 441 |
+
stats[category]["failed"] += 1
|
| 442 |
+
# logger.debug(f" REJECTED {img_path.name}: {metrics['reason']}")
|
| 443 |
+
continue
|
| 444 |
+
|
| 445 |
+
# Dedup check (Hash-based)
|
| 446 |
+
if dedup.is_duplicate(img_path):
|
| 447 |
+
stats[category]["duplicates"] += 1
|
| 448 |
+
# logger.debug(f" DUPLICATE {img_path.name}")
|
| 449 |
+
continue
|
| 450 |
+
|
| 451 |
+
# Copy to processed
|
| 452 |
+
dest = out_dir / img_path.name
|
| 453 |
+
shutil.copy2(img_path, dest)
|
| 454 |
+
stats[category]["passed"] += 1
|
| 455 |
+
added_this_round += 1
|
| 456 |
+
|
| 457 |
+
pbar.close()
|
| 458 |
+
current_count += added_this_round
|
| 459 |
+
|
| 460 |
+
if current_count >= TARGET_COUNT:
|
| 461 |
+
continue # Re-evaluate loop condition (which will break)
|
| 462 |
+
|
| 463 |
+
# If still short, trigger scraper
|
| 464 |
+
needed = TARGET_COUNT - current_count
|
| 465 |
+
if needed > 0:
|
| 466 |
+
logger.warning(f" ⚠️ Short by {needed} images! Launching Scraper to fetch more...")
|
| 467 |
+
|
| 468 |
+
# Fetch query list
|
| 469 |
+
queries = DEFAULT_QUERIES.get(category)
|
| 470 |
+
if not queries:
|
| 471 |
+
# Fallback queries
|
| 472 |
+
theme = category.split("/")[-1]
|
| 473 |
+
queries = [f"{theme} poster", f"{theme} design", f"{theme} advertisement"]
|
| 474 |
+
|
| 475 |
+
# Scrape 2x what we need
|
| 476 |
+
scrape_target = len(raw_images) + (needed * 2)
|
| 477 |
+
# Ensure we at least target 2800 if we are really low
|
| 478 |
+
scrape_target = max(scrape_target, 2800)
|
| 479 |
+
|
| 480 |
+
scraper.TARGET_PER_THEME = scrape_target
|
| 481 |
+
logger.info(f" 🕷️ Scraping target set to {scrape_target} for {category}...")
|
| 482 |
+
|
| 483 |
+
try:
|
| 484 |
+
# scraper.scrape_category downloads to raw_dir/{category}
|
| 485 |
+
# It returns total downloaded count
|
| 486 |
+
new_total = scraper.scrape_category(category, queries)
|
| 487 |
+
logger.info(f" ✅ Scraping finished. Raw total is now {new_total}. Rescanning...")
|
| 488 |
+
except Exception as e:
|
| 489 |
+
logger.error(f" ❌ Scraper failed: {e}")
|
| 490 |
+
break # Stop trying for this category if scraper fails
|
| 491 |
+
else:
|
| 492 |
+
break # Should be caught by top check, but safe fallback
|
| 493 |
+
|
| 494 |
+
# Clear GPU memory
|
| 495 |
+
if torch.cuda.is_available():
|
| 496 |
+
torch.cuda.empty_cache()
|
| 497 |
+
|
| 498 |
+
return dict(stats)
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def print_summary(stats: dict):
|
| 502 |
+
"""Print a summary table."""
|
| 503 |
+
# ... existing print_summary code ...
|
| 504 |
+
print("\n" + "=" * 60)
|
| 505 |
+
print(f"{'Category':<35} | {'Total':<8} | {'Pass':<6} | {'Fail':<6} | {'Dupes':<6}")
|
| 506 |
+
print("-" * 60)
|
| 507 |
+
|
| 508 |
+
total_passed = 0
|
| 509 |
+
for cat, data in sorted(stats.items()):
|
| 510 |
+
print(f"{cat:<35} | {data['total']:<8} | {data['passed']:<6} | {data['failed']:<6} | {data['duplicates']:<6}")
|
| 511 |
+
total_passed += data['passed']
|
| 512 |
+
|
| 513 |
+
print("-" * 60)
|
| 514 |
+
print(f"Total High-Quality Images: {total_passed}")
|
| 515 |
+
print("=" * 60 + "\n")
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
if __name__ == "__main__":
|
| 519 |
+
parser = argparse.ArgumentParser(description="Run Quality Filter with Auto-Scrape")
|
| 520 |
+
parser.add_argument("--config", default="configs/config.yaml", help="Path to config.yaml")
|
| 521 |
+
args = parser.parse_args()
|
| 522 |
+
|
| 523 |
+
config = load_config(args.config)
|
| 524 |
+
|
| 525 |
+
# Run pipeline
|
| 526 |
+
stats = run_quality_filter(config)
|
| 527 |
+
print_summary(stats)
|
| 528 |
+
|
| 529 |
+
logger.info("\n" + "=" * 80)
|
| 530 |
+
logger.info("QUALITY FILTER SUMMARY")
|
| 531 |
+
logger.info("=" * 80)
|
| 532 |
+
logger.info(f" {'Category':35s} {'Total':>7s} {'Passed':>7s} {'Failed':>7s} {'Dupes':>7s} {'Rate':>7s}")
|
| 533 |
+
logger.info(f" {'-'*35} {'-'*7} {'-'*7} {'-'*7} {'-'*7} {'-'*7}")
|
| 534 |
+
|
| 535 |
+
grand_total = grand_passed = 0
|
| 536 |
+
for cat, s in sorted(stats.items()):
|
| 537 |
+
rate = f"{s['passed']/max(s['total'],1)*100:.1f}%"
|
| 538 |
+
logger.info(
|
| 539 |
+
f" {cat:35s} {s['total']:7d} {s['passed']:7d} "
|
| 540 |
+
f"{s['failed']:7d} {s['duplicates']:7d} {rate:>7s}"
|
| 541 |
+
)
|
| 542 |
+
grand_total += s["total"]
|
| 543 |
+
grand_passed += s["passed"]
|
| 544 |
+
|
| 545 |
+
rate = f"{grand_passed/max(grand_total,1)*100:.1f}%"
|
| 546 |
+
logger.info(f" {'-'*35} {'-'*7} {'-'*7} {'-'*7} {'-'*7} {'-'*7}")
|
| 547 |
+
logger.info(f" {'TOTAL':35s} {grand_total:7d} {grand_passed:7d}{'':>17s} {rate:>7s}")
|
| 548 |
+
logger.info("=" * 80)
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def main():
|
| 552 |
+
parser = argparse.ArgumentParser(description="Image Quality Filter (GPU-Accelerated)")
|
| 553 |
+
parser.add_argument("--config", default="configs/config.yaml", help="Path to config.yaml")
|
| 554 |
+
args = parser.parse_args()
|
| 555 |
+
|
| 556 |
+
config = load_config(args.config)
|
| 557 |
+
stats = run_quality_filter(config)
|
| 558 |
+
print_summary(stats)
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
if __name__ == "__main__":
|
| 562 |
+
main()
|
scripts/split_dataset.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import random
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Configure logging
|
| 9 |
+
logging.basicConfig(
|
| 10 |
+
level=logging.INFO,
|
| 11 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 12 |
+
datefmt="%H:%M:%S"
|
| 13 |
+
)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
# Constants
|
| 17 |
+
TARGET_PER_CATEGORY = 1000
|
| 18 |
+
SPLIT_RATIO = (0.8, 0.1, 0.1) # Train, Val, Test
|
| 19 |
+
|
| 20 |
+
DATA_ROOT = Path("data")
|
| 21 |
+
PROCESSED_DIR = DATA_ROOT / "processed"
|
| 22 |
+
TRAIN_DIR = DATA_ROOT / "train"
|
| 23 |
+
VAL_DIR = DATA_ROOT / "val"
|
| 24 |
+
TEST_DIR = DATA_ROOT / "test"
|
| 25 |
+
|
| 26 |
+
def get_image_files(directory):
|
| 27 |
+
"""Recursively get all image files in a directory."""
|
| 28 |
+
extensions = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
|
| 29 |
+
return [f for f in directory.rglob("*") if f.suffix.lower() in extensions and f.is_file()]
|
| 30 |
+
|
| 31 |
+
def clear_directory(path):
|
| 32 |
+
"""Deletes a directory and its contents if it exists."""
|
| 33 |
+
if path.exists():
|
| 34 |
+
logger.warning(f"Deleting existing directory: {path}")
|
| 35 |
+
shutil.rmtree(path)
|
| 36 |
+
|
| 37 |
+
def main():
|
| 38 |
+
logger.info("🚀 Starting Dataset Resplit (v2)")
|
| 39 |
+
logger.info(f"🎯 Target: {TARGET_PER_CATEGORY} images/category | Split: {SPLIT_RATIO}")
|
| 40 |
+
|
| 41 |
+
# 1. Clear existing splits
|
| 42 |
+
clear_directory(TRAIN_DIR)
|
| 43 |
+
clear_directory(VAL_DIR)
|
| 44 |
+
clear_directory(TEST_DIR)
|
| 45 |
+
|
| 46 |
+
TRAIN_DIR.mkdir(parents=True, exist_ok=True)
|
| 47 |
+
VAL_DIR.mkdir(parents=True, exist_ok=True)
|
| 48 |
+
TEST_DIR.mkdir(parents=True, exist_ok=True)
|
| 49 |
+
|
| 50 |
+
# 2. Iterate through categories in processed
|
| 51 |
+
# We assume 'processed' has subfolders like 'workshops/coding', 'workshops/design', etc.
|
| 52 |
+
# We walk to find leaf directories that contain images.
|
| 53 |
+
|
| 54 |
+
# Optimized walker: Only look at files in the current directory
|
| 55 |
+
categories = []
|
| 56 |
+
for root, dirs, files in os.walk(PROCESSED_DIR):
|
| 57 |
+
current_path = Path(root)
|
| 58 |
+
|
| 59 |
+
# Check files in current dir only
|
| 60 |
+
local_images = []
|
| 61 |
+
for f in files:
|
| 62 |
+
if Path(f).suffix.lower() in {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}:
|
| 63 |
+
local_images.append(current_path / f)
|
| 64 |
+
|
| 65 |
+
if local_images:
|
| 66 |
+
# It's a category folder
|
| 67 |
+
rel_path = current_path.relative_to(PROCESSED_DIR)
|
| 68 |
+
categories.append((rel_path, local_images))
|
| 69 |
+
|
| 70 |
+
if not categories:
|
| 71 |
+
logger.error("❌ No categories found in data/processed!")
|
| 72 |
+
return
|
| 73 |
+
|
| 74 |
+
logger.info(f"📂 Found {len(categories)} categories to process.")
|
| 75 |
+
|
| 76 |
+
for rel_path, images in categories:
|
| 77 |
+
category_name = str(rel_path).replace("\\", "/")
|
| 78 |
+
logger.info(f"\n🔹 Processing: {category_name}")
|
| 79 |
+
|
| 80 |
+
# Shuffle and Select
|
| 81 |
+
random.shuffle(images)
|
| 82 |
+
selected_images = images[:TARGET_PER_CATEGORY]
|
| 83 |
+
count = len(selected_images)
|
| 84 |
+
|
| 85 |
+
if count < TARGET_PER_CATEGORY:
|
| 86 |
+
logger.warning(f" ⚠️ Only found {count} images (Target: {TARGET_PER_CATEGORY})")
|
| 87 |
+
else:
|
| 88 |
+
logger.info(f" ✅ Selected 1000 images from {len(images)} available.")
|
| 89 |
+
|
| 90 |
+
# Calculate Splits
|
| 91 |
+
n_train = int(count * SPLIT_RATIO[0])
|
| 92 |
+
n_val = int(count * SPLIT_RATIO[1])
|
| 93 |
+
# Give remainder to test to ensure sum == count (or fix strictly if required, but remainder is safer)
|
| 94 |
+
n_test = count - n_train - n_val
|
| 95 |
+
|
| 96 |
+
train_set = selected_images[:n_train]
|
| 97 |
+
val_set = selected_images[n_train : n_train + n_val]
|
| 98 |
+
test_set = selected_images[n_train + n_val :]
|
| 99 |
+
|
| 100 |
+
logger.info(f" Splitting: Train={len(train_set)}, Val={len(val_set)}, Test={len(test_set)}")
|
| 101 |
+
|
| 102 |
+
# Copy Files
|
| 103 |
+
for dataset, split_name, dest_root in [
|
| 104 |
+
(train_set, "Train", TRAIN_DIR),
|
| 105 |
+
(val_set, "Val", VAL_DIR),
|
| 106 |
+
(test_set, "Test", TEST_DIR)
|
| 107 |
+
]:
|
| 108 |
+
if not dataset:
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
dest_category_dir = dest_root / rel_path
|
| 112 |
+
dest_category_dir.mkdir(parents=True, exist_ok=True)
|
| 113 |
+
|
| 114 |
+
for img_path in dataset:
|
| 115 |
+
try:
|
| 116 |
+
shutil.copy2(img_path, dest_category_dir / img_path.name)
|
| 117 |
+
# Try to copy caption text file if it exists
|
| 118 |
+
txt_path = img_path.with_suffix(".txt")
|
| 119 |
+
if txt_path.exists():
|
| 120 |
+
shutil.copy2(txt_path, dest_category_dir / txt_path.name)
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.error(f"Failed to copy {img_path.name}: {e}")
|
| 123 |
+
|
| 124 |
+
logger.info("\n🎉 Resplit Complete.")
|
| 125 |
+
|
| 126 |
+
# Verification stats
|
| 127 |
+
logger.info("📊 Final Counts:")
|
| 128 |
+
for d, name in [(TRAIN_DIR, "TRAIN"), (VAL_DIR, "VAL"), (TEST_DIR, "TEST")]:
|
| 129 |
+
total = len(list(d.rglob("*.*"))) # Approx count all files
|
| 130 |
+
# Better to count images
|
| 131 |
+
img_count = len(get_image_files(d))
|
| 132 |
+
logger.info(f" {name}: {img_count} images")
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
main()
|
| 136 |
+
|
scripts/targeted_filter.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import shutil
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
# Add current directory to path so we can import sibling scripts
|
| 12 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
+
|
| 14 |
+
from quality_filter import ImageQualityChecker, Deduplicator, GPUHasher, load_config
|
| 15 |
+
|
| 16 |
+
# Configure logging
|
| 17 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
TARGET_COUNT = 1300
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
logger.info("🚀 Starting Targeted Top-Up Filter (v2)")
|
| 24 |
+
logger.info(f"🎯 Goal: Ensure every category has >= {TARGET_COUNT} unique, high-quality images")
|
| 25 |
+
|
| 26 |
+
# Load config
|
| 27 |
+
config_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "configs", "config.yaml")
|
| 28 |
+
config = load_config(config_path)
|
| 29 |
+
|
| 30 |
+
raw_dir = Path(config["paths"]["data"]["raw"])
|
| 31 |
+
processed_dir = Path(config["paths"]["data"]["processed"])
|
| 32 |
+
|
| 33 |
+
# Initialize checkers
|
| 34 |
+
checker = ImageQualityChecker(config)
|
| 35 |
+
dedup = Deduplicator()
|
| 36 |
+
|
| 37 |
+
if torch.cuda.is_available():
|
| 38 |
+
logger.info(f"⚡ Using GPU: {torch.cuda.get_device_name(0)}")
|
| 39 |
+
|
| 40 |
+
# 1. LOAD ALL EXISTING PROCESSED IMAGES (Global Deduplication)
|
| 41 |
+
logger.info("🧠 Learning ALL existing images to prevent duplicates...")
|
| 42 |
+
all_processed_files = []
|
| 43 |
+
for root, _, files in os.walk(processed_dir):
|
| 44 |
+
for file in files:
|
| 45 |
+
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.webp')):
|
| 46 |
+
all_processed_files.append(Path(root) / file)
|
| 47 |
+
|
| 48 |
+
existing_hashes = 0
|
| 49 |
+
if all_processed_files:
|
| 50 |
+
hasher = GPUHasher()
|
| 51 |
+
# Compute hashes for everything currently in processed
|
| 52 |
+
batch_hashes = hasher.compute_hashes(all_processed_files, batch_size=128)
|
| 53 |
+
dedup.hashes.update(batch_hashes)
|
| 54 |
+
existing_hashes = len(batch_hashes)
|
| 55 |
+
|
| 56 |
+
logger.info(f"✅ Memorized {existing_hashes} unique images in processed dataset.")
|
| 57 |
+
|
| 58 |
+
# 2. IDENTIFY CATEGORIES NEEDING TOP-UP
|
| 59 |
+
categories_to_process = []
|
| 60 |
+
for root, dirs, files in os.walk(raw_dir):
|
| 61 |
+
if not dirs: # Leaf node
|
| 62 |
+
rel_path = Path(root).relative_to(raw_dir)
|
| 63 |
+
proc_path = processed_dir / rel_path
|
| 64 |
+
|
| 65 |
+
# Count images in processed
|
| 66 |
+
if proc_path.exists():
|
| 67 |
+
curr_count = len([f for f in os.listdir(proc_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
|
| 68 |
+
else:
|
| 69 |
+
curr_count = 0
|
| 70 |
+
|
| 71 |
+
if curr_count < TARGET_COUNT:
|
| 72 |
+
categories_to_process.append((rel_path, Path(root), proc_path, curr_count))
|
| 73 |
+
else:
|
| 74 |
+
pass # Already meets target
|
| 75 |
+
|
| 76 |
+
if not categories_to_process:
|
| 77 |
+
logger.info("✨ All categories meet the target of 1300! No work needed.")
|
| 78 |
+
return
|
| 79 |
+
|
| 80 |
+
logger.info(f"📋 Found {len(categories_to_process)} categories below target.")
|
| 81 |
+
|
| 82 |
+
# 3. PROCESS MISSING CATEGORIES
|
| 83 |
+
for rel_path, raw_category_path, proc_category_path, current_count in categories_to_process:
|
| 84 |
+
needed = TARGET_COUNT - current_count
|
| 85 |
+
category_name = str(rel_path).replace("\\", "/")
|
| 86 |
+
|
| 87 |
+
logger.info(f"\n🔸 Processing: {category_name}")
|
| 88 |
+
logger.info(f" Current: {current_count} | Needed: {needed}")
|
| 89 |
+
|
| 90 |
+
proc_category_path.mkdir(parents=True, exist_ok=True)
|
| 91 |
+
|
| 92 |
+
# Get all raw files
|
| 93 |
+
raw_files = sorted([
|
| 94 |
+
raw_category_path / f
|
| 95 |
+
for f in os.listdir(raw_category_path)
|
| 96 |
+
if f.lower().endswith(('.jpg', '.jpeg', '.png'))
|
| 97 |
+
])
|
| 98 |
+
|
| 99 |
+
added = 0
|
| 100 |
+
skipped_dupe = 0
|
| 101 |
+
skipped_quality = 0
|
| 102 |
+
|
| 103 |
+
# Batch process raw files for efficiency?
|
| 104 |
+
# Actually, since we need to copy them one by one based on check,
|
| 105 |
+
# we can batch quality check/hash check if we want, but sequential loop is clearer for "stop when satisfied".
|
| 106 |
+
# Let's use GPUHasher on raw files in chunks to speed up the dedup check at least.
|
| 107 |
+
|
| 108 |
+
# Optimization: Filter out filenames that already exist (exact match)
|
| 109 |
+
existing_filenames = set(os.listdir(proc_category_path))
|
| 110 |
+
candidates = [f for f in raw_files if f.name not in existing_filenames]
|
| 111 |
+
|
| 112 |
+
if not candidates:
|
| 113 |
+
logger.warning(" ❌ No new raw files available to scan!")
|
| 114 |
+
continue
|
| 115 |
+
|
| 116 |
+
# Progress bar
|
| 117 |
+
pbar = tqdm(total=needed, desc=f" Filling {category_name}", unit="img")
|
| 118 |
+
|
| 119 |
+
# Iterate through candidates
|
| 120 |
+
for raw_img_path in candidates:
|
| 121 |
+
if added >= needed:
|
| 122 |
+
break
|
| 123 |
+
|
| 124 |
+
# 1. Deduplication Check (Fastest check first? No, Quality is cleaner but slower. Dedup is fast with hash)
|
| 125 |
+
# Actually we need hash to check dedup.
|
| 126 |
+
|
| 127 |
+
# We'll calculate hash for individual image (slower than batch but we need decision per image)
|
| 128 |
+
# OR we could batch hash all candidates first.
|
| 129 |
+
# Let's batch hash candidates first!
|
| 130 |
+
|
| 131 |
+
# Wait, let's just do it sequentially for simplicity unless it's too slow.
|
| 132 |
+
# With GPUHasher, we can compute hash quickly.
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
# 1. Quality Check (GPU)
|
| 136 |
+
passed, metrics = checker.check(raw_img_path)
|
| 137 |
+
if not passed:
|
| 138 |
+
skipped_quality += 1
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
# 2. Dedup Check (needs hash)
|
| 142 |
+
if dedup.is_duplicate(raw_img_path):
|
| 143 |
+
skipped_dupe += 1
|
| 144 |
+
continue
|
| 145 |
+
|
| 146 |
+
# 3. Copy
|
| 147 |
+
shutil.copy2(raw_img_path, proc_category_path / raw_img_path.name)
|
| 148 |
+
added += 1
|
| 149 |
+
pbar.update(1)
|
| 150 |
+
|
| 151 |
+
except Exception as e:
|
| 152 |
+
logger.error(f"Error processing {raw_img_path}: {e}")
|
| 153 |
+
continue
|
| 154 |
+
|
| 155 |
+
pbar.close()
|
| 156 |
+
|
| 157 |
+
final_count = current_count + added
|
| 158 |
+
if final_count >= TARGET_COUNT:
|
| 159 |
+
logger.info(f" ✅ Reached target! ({final_count})")
|
| 160 |
+
else:
|
| 161 |
+
logger.warning(f" ⚠️ Finished scanning raw files. Ended with {final_count} (Still short by {TARGET_COUNT - final_count})")
|
| 162 |
+
|
| 163 |
+
logger.info("\n🎉 Top-Up Complete!")
|
| 164 |
+
|
| 165 |
+
if __name__ == "__main__":
|
| 166 |
+
main()
|
| 167 |
+
|
scripts/targeted_scraper.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
# Add current directory to path so we can import sibling scripts
|
| 7 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 8 |
+
|
| 9 |
+
from pinterest_scraper import PinterestScraper, load_config, DEFAULT_QUERIES
|
| 10 |
+
|
| 11 |
+
# Configure logging
|
| 12 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
# List of categories that need more images (Target: 2800 raw to get ~1300 clean)
|
| 16 |
+
TARGET_CATEGORIES = [
|
| 17 |
+
"workshops/design",
|
| 18 |
+
"workshops/coding",
|
| 19 |
+
"workshops/business",
|
| 20 |
+
"tech_fest/hackathon",
|
| 21 |
+
"tech_fest/general",
|
| 22 |
+
"tech_fest/coding_competition",
|
| 23 |
+
"tech_fest/web_app_dev",
|
| 24 |
+
"tech_fest/cybersecurity",
|
| 25 |
+
"festivals/navratri_garba",
|
| 26 |
+
"sports/general"
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
TARGET_COUNT = 2800
|
| 30 |
+
|
| 31 |
+
def main():
|
| 32 |
+
logger.info("🚀 Starting Targeted Scraper for Low-Data Categories")
|
| 33 |
+
logger.info(f"🎯 Target Count: {TARGET_COUNT} images per category")
|
| 34 |
+
|
| 35 |
+
# Load config from parent directory
|
| 36 |
+
config_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "configs", "config.yaml")
|
| 37 |
+
config = load_config(config_path)
|
| 38 |
+
|
| 39 |
+
raw_dir = config["paths"]["data"]["raw"]
|
| 40 |
+
|
| 41 |
+
# Initialize scraper
|
| 42 |
+
scraper = PinterestScraper(config, raw_dir)
|
| 43 |
+
|
| 44 |
+
# Override global target
|
| 45 |
+
scraper.TARGET_PER_THEME = TARGET_COUNT
|
| 46 |
+
|
| 47 |
+
for category in TARGET_CATEGORIES:
|
| 48 |
+
logger.info(f"\n============================================================")
|
| 49 |
+
logger.info(f"Processing: {category}")
|
| 50 |
+
logger.info(f"============================================================")
|
| 51 |
+
|
| 52 |
+
# Get queries for this category
|
| 53 |
+
queries = DEFAULT_QUERIES.get(category)
|
| 54 |
+
if not queries:
|
| 55 |
+
logger.warning(f"⚠️ No specific queries found for {category}, generating generic ones.")
|
| 56 |
+
# Fallback if no specific queries exist (though they should based on our previous edits)
|
| 57 |
+
theme = category.split("/")[-1]
|
| 58 |
+
queries = [f"{theme} poster design", f"{theme} event flyer", f"creative {theme} poster"]
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
count = scraper.scrape_category(category, queries)
|
| 62 |
+
logger.info(f"✅ Finished {category}: {count} total images")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"❌ Failed processing {category}: {e}")
|
| 65 |
+
|
| 66 |
+
# Small break between categories
|
| 67 |
+
time.sleep(2)
|
| 68 |
+
|
| 69 |
+
logger.info("\n🎉 All targeted categories processed!")
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
scripts/test_checkpoint.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
test_checkpoint.py
|
| 4 |
+
==================
|
| 5 |
+
Two-Stage Poster Generation Pipeline — SDXL Artwork + PIL Typography
|
| 6 |
+
|
| 7 |
+
Stage 1 Generate pure visual artwork with SDXL + Campus AI LoRA.
|
| 8 |
+
Prompts describe ONLY visual atmosphere — zero text references.
|
| 9 |
+
guidance_scale=7.5 ensures the negative prompt suppresses all
|
| 10 |
+
hallucinated text/watermarks from the diffusion output.
|
| 11 |
+
|
| 12 |
+
Stage 2 PIL Compositor overlays pixel-perfect typography on the raw artwork.
|
| 13 |
+
|
| 14 |
+
Usage:
|
| 15 |
+
python test_checkpoint.py
|
| 16 |
+
|
| 17 |
+
Outputs in output/test_generations/:
|
| 18 |
+
<slug>_artwork.png — raw SDXL output, no text
|
| 19 |
+
<slug>_poster.png — final composited poster
|
| 20 |
+
|
| 21 |
+
Per-poster controls:
|
| 22 |
+
text_position "top" | "center" | "bottom" | "auto"
|
| 23 |
+
Set based on where the artwork has clean negative space.
|
| 24 |
+
scrim True for dark/busy artworks — adds contrast under text.
|
| 25 |
+
False for vivid/bright artworks — keep colours untouched.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
|
| 30 |
+
import os
|
| 31 |
+
import sys
|
| 32 |
+
|
| 33 |
+
import torch
|
| 34 |
+
from pathlib import Path
|
| 35 |
+
|
| 36 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 37 |
+
from poster_compositor import composite_poster, ensure_fonts
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
# Shared negative prompt
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
# Explicitly blocks ALL forms of text/typography from the raw artwork.
|
| 44 |
+
# garbled_text and illegible_text added specifically to kill LoRA artefacts
|
| 45 |
+
# like BOMIELLOOOKD / OULSTECS seen in previous generations.
|
| 46 |
+
|
| 47 |
+
_NEG = (
|
| 48 |
+
"text, words, letters, typography, fonts, captions, labels, watermark, "
|
| 49 |
+
"signature, logo, banner, title, heading, writing, written text, "
|
| 50 |
+
"illegible text, garbled text, gibberish text, distorted words, "
|
| 51 |
+
"random letters, fake words, blurry, low quality, deformed, ugly, "
|
| 52 |
+
"disfigured, oversaturated, bad anatomy, cropped, out of frame"
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ---------------------------------------------------------------------------
|
| 57 |
+
# Poster definitions
|
| 58 |
+
# ---------------------------------------------------------------------------
|
| 59 |
+
|
| 60 |
+
POSTERS: list[tuple[str, str, dict]] = [
|
| 61 |
+
|
| 62 |
+
# ── Freshers Party ──────────────────────────────────────────────────────
|
| 63 |
+
(
|
| 64 |
+
"freshers_party",
|
| 65 |
+
|
| 66 |
+
"campus_ai_poster Vibrant freshers welcome party background. "
|
| 67 |
+
"Confetti explosion in electric blue and neon purple raining from above. "
|
| 68 |
+
"Disco ball casting prismatic reflections across a dark concert stage. "
|
| 69 |
+
"Bokeh light circles in hot pink and cyan filling the frame. "
|
| 70 |
+
"Bollywood dance-floor energy with glitter dust in a single spotlight beam. "
|
| 71 |
+
"Shallow depth of field, cinematic wide-angle composition. "
|
| 72 |
+
"No text, no signs, no banners anywhere in the scene.",
|
| 73 |
+
|
| 74 |
+
dict(
|
| 75 |
+
title = "Freshers Bash 2026",
|
| 76 |
+
subtitle = "Welcome to the Jungle, First Years!",
|
| 77 |
+
date = "August 22, 2026 • 6 PM Onwards",
|
| 78 |
+
venue = "Open Air Theatre, DTU",
|
| 79 |
+
organizer = "Student Council 2026–27",
|
| 80 |
+
accent_color = "#E040FB",
|
| 81 |
+
style = "bold",
|
| 82 |
+
text_position = "bottom",
|
| 83 |
+
scrim = True,
|
| 84 |
+
),
|
| 85 |
+
),
|
| 86 |
+
|
| 87 |
+
# ── Navratri Garba ──────────────────────────────────────────────────────
|
| 88 |
+
(
|
| 89 |
+
"navratri_garba",
|
| 90 |
+
|
| 91 |
+
"campus_ai_poster Stunning Navratri Garba night celebration background. "
|
| 92 |
+
"Swirling dandiya sticks and ghagra choli silhouettes mid-spin viewed from above. "
|
| 93 |
+
"Warm saffron, deep crimson, and gold falling flower petals. "
|
| 94 |
+
"Intricate mirror-work embroidery and marigold garland borders framing the scene. "
|
| 95 |
+
"Glowing earthen diyas reflecting off a polished stone floor. "
|
| 96 |
+
"Rich festive atmosphere, painterly detail, vibrant colour contrast. "
|
| 97 |
+
"No text, no signs, no labels anywhere in the scene.",
|
| 98 |
+
|
| 99 |
+
dict(
|
| 100 |
+
title = "Garba Raas Night",
|
| 101 |
+
subtitle = "Nine Nights of Dandiya & Dance",
|
| 102 |
+
date = "October 2–10, 2026",
|
| 103 |
+
venue = "College Ground, SVNIT Surat",
|
| 104 |
+
organizer = "Gujarat Cultural Committee",
|
| 105 |
+
accent_color = "#FF6F00",
|
| 106 |
+
style = "elegant",
|
| 107 |
+
text_position = "bottom",
|
| 108 |
+
scrim = False,
|
| 109 |
+
),
|
| 110 |
+
),
|
| 111 |
+
|
| 112 |
+
# ── Coding Hackathon ────────────────────────────────────────────────────
|
| 113 |
+
(
|
| 114 |
+
"coding_hackathon",
|
| 115 |
+
|
| 116 |
+
"campus_ai_poster Dark futuristic hackathon coding environment background. "
|
| 117 |
+
"Multiple holographic screens floating in 3-D space with scrolling green "
|
| 118 |
+
"terminal animations and binary rain patterns. "
|
| 119 |
+
"Glowing cyan circuit-board traces on a deep black background. "
|
| 120 |
+
"Keyboard and laptop silhouettes lit from below by a cool blue glow. "
|
| 121 |
+
"High-contrast, ultra-sharp, cyberpunk aesthetic. "
|
| 122 |
+
"No text, no readable characters, no words anywhere in the scene.",
|
| 123 |
+
|
| 124 |
+
dict(
|
| 125 |
+
title = "Code-a-thon 4.0",
|
| 126 |
+
subtitle = "36 Hours. No Sleep. Pure Code.",
|
| 127 |
+
date = "January 18–19, 2026",
|
| 128 |
+
venue = "CS Lab 301, IIT Bombay",
|
| 129 |
+
organizer = "WnCC & DevClub",
|
| 130 |
+
accent_color = "#00E676",
|
| 131 |
+
style = "bold",
|
| 132 |
+
text_position = "bottom",
|
| 133 |
+
scrim = True,
|
| 134 |
+
),
|
| 135 |
+
),
|
| 136 |
+
|
| 137 |
+
# ── Blood Donation Camp ─────────────────────────────────────────────────
|
| 138 |
+
(
|
| 139 |
+
"blood_donation",
|
| 140 |
+
|
| 141 |
+
"campus_ai_poster Warm heartfelt blood donation awareness background. "
|
| 142 |
+
"A large red blood drop with a heartbeat ECG line running through its center. "
|
| 143 |
+
"Clean white and soft crimson minimalist medical composition. "
|
| 144 |
+
"Two open hands gently cupping the drop from below. "
|
| 145 |
+
"Gentle radial light bloom. Compassionate, hopeful healthcare aesthetic. "
|
| 146 |
+
"No text, no words, no labels in the scene.",
|
| 147 |
+
|
| 148 |
+
dict(
|
| 149 |
+
title = "Donate Blood, Save Lives",
|
| 150 |
+
subtitle = "NSS Blood Donation Camp",
|
| 151 |
+
date = "March 5, 2026 • 9 AM – 4 PM",
|
| 152 |
+
venue = "Health Centre, NIT Trichy",
|
| 153 |
+
organizer = "NSS Unit & Red Cross Society",
|
| 154 |
+
accent_color = "#D32F2F",
|
| 155 |
+
style = "modern",
|
| 156 |
+
text_position = "bottom",
|
| 157 |
+
scrim = False,
|
| 158 |
+
),
|
| 159 |
+
),
|
| 160 |
+
|
| 161 |
+
# ── Farewell ────────────────────────────────────────────────────────────
|
| 162 |
+
(
|
| 163 |
+
"farewell",
|
| 164 |
+
|
| 165 |
+
"campus_ai_poster Sentimental farewell celebration background. "
|
| 166 |
+
"Golden fairy lights strung across a twilight campus courtyard. "
|
| 167 |
+
"Graduation caps thrown upward against a warm amber-peach sunset sky. "
|
| 168 |
+
"Bokeh spheres in champagne gold and soft peach. "
|
| 169 |
+
"Petals falling slowly through the air from above. "
|
| 170 |
+
"Nostalgic, bittersweet, and celebratory mood. Warm film-grain texture. "
|
| 171 |
+
"No text, no banners, no words in the scene.",
|
| 172 |
+
|
| 173 |
+
dict(
|
| 174 |
+
title = "Alvida — Farewell 2026",
|
| 175 |
+
subtitle = "For the Batch That Made It Legendary",
|
| 176 |
+
date = "May 15, 2026 • 5 PM",
|
| 177 |
+
venue = "Main Auditorium, NSUT",
|
| 178 |
+
organizer = "Third Year Organizing Committee",
|
| 179 |
+
accent_color = "#FFD54F",
|
| 180 |
+
style = "elegant",
|
| 181 |
+
text_position = "bottom",
|
| 182 |
+
scrim = False,
|
| 183 |
+
),
|
| 184 |
+
),
|
| 185 |
+
|
| 186 |
+
# ── Annual Cultural Fest ─────────────────────────────────────────────────
|
| 187 |
+
(
|
| 188 |
+
"annual_fest",
|
| 189 |
+
|
| 190 |
+
"campus_ai_poster Epic grand annual college cultural fest background. "
|
| 191 |
+
"Massive paint-splash explosion in rainbow neon colours filling the entire frame. "
|
| 192 |
+
"Fireworks bursting above a packed outdoor main stage. "
|
| 193 |
+
"Laser beams sweeping over a roaring silhouette crowd. "
|
| 194 |
+
"Smoke machines and confetti cannons firing simultaneously. "
|
| 195 |
+
"Maximum energy, blockbuster festival scale, ultra-vivid colour grading. "
|
| 196 |
+
"Absolutely no text, no stage signs, no banners, no readable characters.",
|
| 197 |
+
|
| 198 |
+
dict(
|
| 199 |
+
title = "MOKSHA 2026",
|
| 200 |
+
subtitle = "The Biggest College Fest in India",
|
| 201 |
+
date = "February 14–16, 2026",
|
| 202 |
+
venue = "NSUT Main Campus, Dwarka",
|
| 203 |
+
organizer = "Moksha Organizing Committee",
|
| 204 |
+
accent_color = "#FF1744",
|
| 205 |
+
style = "bold",
|
| 206 |
+
text_position = "bottom",
|
| 207 |
+
scrim = True,
|
| 208 |
+
),
|
| 209 |
+
),
|
| 210 |
+
|
| 211 |
+
# ── Robotics Competition ─────────────────────────────────────────────────
|
| 212 |
+
(
|
| 213 |
+
"robotics_competition",
|
| 214 |
+
|
| 215 |
+
"campus_ai_poster Futuristic robotics competition arena background. "
|
| 216 |
+
"A sleek industrial robot arm mid-motion under dramatic blue-white spotlights. "
|
| 217 |
+
"Metallic gears, pistons, and carbon-fibre surface textures. "
|
| 218 |
+
"Electric sparks flying off welded joints. Dark smoke and industrial haze. "
|
| 219 |
+
"High-contrast dramatic lighting, mechanical precision aesthetic. "
|
| 220 |
+
"No text, no labels, no signage anywhere in the scene.",
|
| 221 |
+
|
| 222 |
+
dict(
|
| 223 |
+
title = "RoboWars 2026",
|
| 224 |
+
subtitle = "Build It. Break It. Win It.",
|
| 225 |
+
date = "March 22, 2026",
|
| 226 |
+
venue = "Innovation Hub, BITS Pilani",
|
| 227 |
+
organizer = "Robotics & Automation Society",
|
| 228 |
+
accent_color = "#40C4FF",
|
| 229 |
+
style = "modern",
|
| 230 |
+
text_position = "bottom",
|
| 231 |
+
scrim = True,
|
| 232 |
+
),
|
| 233 |
+
),
|
| 234 |
+
|
| 235 |
+
# ── Standup Comedy Night ─────────────────────────────────────────────────
|
| 236 |
+
(
|
| 237 |
+
"standup_comedy",
|
| 238 |
+
|
| 239 |
+
"campus_ai_poster Moody open-mic comedy night stage background. "
|
| 240 |
+
"Single golden spotlight cone hitting a lone microphone stand centre stage. "
|
| 241 |
+
"Deep maroon velvet curtains framing the wings on both sides. "
|
| 242 |
+
"Brick wall texture visible at the back — classic comedy club look. "
|
| 243 |
+
"Warm amber footlights and a faint laughing crowd silhouette at the bottom. "
|
| 244 |
+
"Intimate, atmospheric, slightly gritty feel. "
|
| 245 |
+
"No text, no words, no chalk board writing, no signs anywhere.",
|
| 246 |
+
|
| 247 |
+
dict(
|
| 248 |
+
title = "Laugh Riot 2026",
|
| 249 |
+
subtitle = "Open Mic Comedy Night",
|
| 250 |
+
date = "April 5, 2026 • 7 PM",
|
| 251 |
+
venue = "Black Box Theatre, Miranda House",
|
| 252 |
+
organizer = "The Comedy Collective",
|
| 253 |
+
accent_color = "#FFAB40",
|
| 254 |
+
style = "modern",
|
| 255 |
+
text_position = "top", # mic + spotlight fill center/bottom
|
| 256 |
+
scrim = True,
|
| 257 |
+
),
|
| 258 |
+
),
|
| 259 |
+
|
| 260 |
+
# ── Diwali Celebration ───────────────────────────────────────────────────
|
| 261 |
+
(
|
| 262 |
+
"diwali",
|
| 263 |
+
|
| 264 |
+
"campus_ai_poster Magical Diwali festival night background. "
|
| 265 |
+
"Hundreds of glowing earthen diyas arranged in concentric circles on dark stone. "
|
| 266 |
+
"Fireworks bursting in gold, silver, and emerald green overhead. "
|
| 267 |
+
"Intricate rangoli patterns in vibrant pink, blue, and orange surrounding the diyas. "
|
| 268 |
+
"Warm golden bokeh light spheres floating throughout. "
|
| 269 |
+
"Festive, divine, deeply traditional Indian atmosphere. "
|
| 270 |
+
"No text, no words, no labels anywhere in the scene.",
|
| 271 |
+
|
| 272 |
+
dict(
|
| 273 |
+
title = "Diwali Utsav 2026",
|
| 274 |
+
subtitle = "Festival of Lights on Campus",
|
| 275 |
+
date = "October 20, 2026 • 6 PM",
|
| 276 |
+
venue = "Central Lawn, IIT Delhi",
|
| 277 |
+
organizer = "Cultural Committee & NSS",
|
| 278 |
+
accent_color = "#FFD700",
|
| 279 |
+
style = "elegant",
|
| 280 |
+
text_position = "top", # rangoli / diyas fill bottom beautifully
|
| 281 |
+
scrim = False,
|
| 282 |
+
),
|
| 283 |
+
),
|
| 284 |
+
|
| 285 |
+
]
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# ---------------------------------------------------------------------------
|
| 289 |
+
# Pipeline
|
| 290 |
+
# ---------------------------------------------------------------------------
|
| 291 |
+
|
| 292 |
+
def _load_pipeline(base_id: str, lora_dir: str, lora_file: str):
|
| 293 |
+
from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler
|
| 294 |
+
|
| 295 |
+
print(" Loading SDXL base model ...")
|
| 296 |
+
pipe = AutoPipelineForText2Image.from_pretrained(
|
| 297 |
+
base_id,
|
| 298 |
+
torch_dtype = torch.float16,
|
| 299 |
+
variant = "fp16",
|
| 300 |
+
use_safetensors = True,
|
| 301 |
+
).to("cuda")
|
| 302 |
+
|
| 303 |
+
# DPM++ 2M Karras — sharper outputs, better prompt adherence than DDPM
|
| 304 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
|
| 305 |
+
pipe.scheduler.config,
|
| 306 |
+
use_karras_sigmas = True,
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
lora_path = os.path.join(lora_dir, lora_file)
|
| 310 |
+
if os.path.exists(lora_path):
|
| 311 |
+
pipe.load_lora_weights(lora_dir, weight_name=lora_file, adapter_name="campus_poster")
|
| 312 |
+
pipe.set_adapters(["campus_poster"], adapter_weights=[1.0])
|
| 313 |
+
print(f" LoRA loaded → {lora_path}")
|
| 314 |
+
else:
|
| 315 |
+
print(f" WARNING: LoRA not found at {lora_path} — using base SDXL only")
|
| 316 |
+
|
| 317 |
+
return pipe
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def generate_posters() -> None:
|
| 321 |
+
out_dir = Path("output/test_generations")
|
| 322 |
+
lora_dir = "models/sdxl/checkpoints/campus_ai_poster_sdxl_phase3"
|
| 323 |
+
lora_file = "campus_ai_poster_sdxl_phase3.safetensors"
|
| 324 |
+
base_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
| 325 |
+
|
| 326 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 327 |
+
|
| 328 |
+
print("=" * 60)
|
| 329 |
+
print(" CAMPUS AI — TWO-STAGE POSTER PIPELINE")
|
| 330 |
+
print("=" * 60)
|
| 331 |
+
|
| 332 |
+
print("\n[Stage 0] Downloading / verifying fonts ...")
|
| 333 |
+
ensure_fonts()
|
| 334 |
+
|
| 335 |
+
print("\n[Stage 1] Loading SDXL + Campus AI LoRA ...")
|
| 336 |
+
pipe = _load_pipeline(base_id, lora_dir, lora_file)
|
| 337 |
+
|
| 338 |
+
print(f"\n[Stage 2] Generating {len(POSTERS)} posters ...\n")
|
| 339 |
+
|
| 340 |
+
for slug, artwork_prompt, text_cfg in POSTERS:
|
| 341 |
+
label = slug.upper().replace("_", " ")
|
| 342 |
+
print(f" 🎨 {label}")
|
| 343 |
+
|
| 344 |
+
artwork = pipe(
|
| 345 |
+
artwork_prompt,
|
| 346 |
+
negative_prompt = _NEG,
|
| 347 |
+
num_inference_steps = 35, # +5 steps for cleaner detail
|
| 348 |
+
guidance_scale = 7.5, # stronger negative adherence — kills hallucinated text
|
| 349 |
+
).images[0]
|
| 350 |
+
|
| 351 |
+
artwork_path = out_dir / f"{slug}_artwork.png"
|
| 352 |
+
artwork.save(artwork_path)
|
| 353 |
+
print(f" artwork → {artwork_path}")
|
| 354 |
+
|
| 355 |
+
final = composite_poster(artwork, **text_cfg)
|
| 356 |
+
poster_path = out_dir / f"{slug}_poster.png"
|
| 357 |
+
final.save(poster_path)
|
| 358 |
+
print(f" poster → {poster_path}\n")
|
| 359 |
+
|
| 360 |
+
del pipe
|
| 361 |
+
torch.cuda.empty_cache()
|
| 362 |
+
|
| 363 |
+
print("=" * 60)
|
| 364 |
+
print(f" ✅ Done. All outputs in {out_dir}/")
|
| 365 |
+
print(" *_artwork.png → raw SDXL art, no text")
|
| 366 |
+
print(" *_poster.png → final composited poster")
|
| 367 |
+
print("=" * 60)
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
if __name__ == "__main__":
|
| 371 |
+
generate_posters()
|
scripts/tuning_dataset.py
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Tuning Dataset Builder
|
| 4 |
+
======================
|
| 5 |
+
Downloads high-quality poster images for Phase 3 fine-tuning.
|
| 6 |
+
Uses Google Custom Search (free tier) and Bing image search as fallback.
|
| 7 |
+
Images are saved into data/tuning/<category>/<subcategory>/.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python scripts/tuning_dataset.py
|
| 11 |
+
python scripts/tuning_dataset.py --per-category 20
|
| 12 |
+
python scripts/tuning_dataset.py --dry-run
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
import json
|
| 18 |
+
import time
|
| 19 |
+
import hashlib
|
| 20 |
+
import argparse
|
| 21 |
+
import re
|
| 22 |
+
import requests
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from urllib.parse import quote_plus
|
| 25 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ============================================================================
|
| 29 |
+
# SEARCH QUERIES — curated for each of the 55 subcategories
|
| 30 |
+
# ============================================================================
|
| 31 |
+
|
| 32 |
+
CATEGORIES = {
|
| 33 |
+
# ---- College Events ----
|
| 34 |
+
"college_events/farewell": [
|
| 35 |
+
"college farewell party poster design HD",
|
| 36 |
+
"farewell event invitation poster aesthetic",
|
| 37 |
+
"farewell night celebration poster elegant design",
|
| 38 |
+
],
|
| 39 |
+
"college_events/freshers": [
|
| 40 |
+
"freshers party welcome poster design neon",
|
| 41 |
+
"freshers day college poster vibrant colorful",
|
| 42 |
+
"fresher welcome event poster creative",
|
| 43 |
+
],
|
| 44 |
+
"college_events/alumni_reunion": [
|
| 45 |
+
"alumni reunion event poster design elegant",
|
| 46 |
+
"alumni meet invitation poster university",
|
| 47 |
+
],
|
| 48 |
+
"college_events/graduation": [
|
| 49 |
+
"graduation ceremony poster elegant gold",
|
| 50 |
+
"convocation celebration poster university beautiful",
|
| 51 |
+
],
|
| 52 |
+
|
| 53 |
+
# ---- Cultural Fest ----
|
| 54 |
+
"cultural_fest/art_exhibition": [
|
| 55 |
+
"art exhibition poster design modern gallery",
|
| 56 |
+
"art gallery opening event poster minimal beautiful",
|
| 57 |
+
],
|
| 58 |
+
"cultural_fest/dance": [
|
| 59 |
+
"dance competition poster vibrant colorful HD",
|
| 60 |
+
"dance festival event poster aesthetic beautiful",
|
| 61 |
+
"classical dance performance poster Indian design",
|
| 62 |
+
],
|
| 63 |
+
"cultural_fest/drama_theatre": [
|
| 64 |
+
"theatre drama play poster artistic dark elegant",
|
| 65 |
+
"stage play event poster design creative",
|
| 66 |
+
],
|
| 67 |
+
"cultural_fest/fashion_show": [
|
| 68 |
+
"fashion show event poster elegant luxury design",
|
| 69 |
+
"college fashion gala poster premium aesthetic",
|
| 70 |
+
],
|
| 71 |
+
"cultural_fest/general": [
|
| 72 |
+
"cultural fest poster college India vibrant",
|
| 73 |
+
"annual cultural festival poster colorful design",
|
| 74 |
+
],
|
| 75 |
+
"cultural_fest/literary": [
|
| 76 |
+
"literary fest poster book reading event design",
|
| 77 |
+
"poetry slam event poster creative typography",
|
| 78 |
+
],
|
| 79 |
+
"cultural_fest/music": [
|
| 80 |
+
"music concert poster design neon glow HD",
|
| 81 |
+
"live music event poster rock band stage",
|
| 82 |
+
"college music festival poster vibrant DJ",
|
| 83 |
+
],
|
| 84 |
+
"cultural_fest/standup_comedy": [
|
| 85 |
+
"standup comedy show poster design microphone",
|
| 86 |
+
"open mic comedy night poster neon creative",
|
| 87 |
+
"comedy event poster funny stage spotlight",
|
| 88 |
+
],
|
| 89 |
+
|
| 90 |
+
# ---- Entertainment ----
|
| 91 |
+
"entertainment/food_fest": [
|
| 92 |
+
"food festival poster design appetizing HD",
|
| 93 |
+
"street food fest poster colorful delicious",
|
| 94 |
+
"college food carnival poster warm inviting",
|
| 95 |
+
],
|
| 96 |
+
"entertainment/gaming": [
|
| 97 |
+
"gaming tournament poster esports neon RGB",
|
| 98 |
+
"video game competition poster futuristic glowing",
|
| 99 |
+
],
|
| 100 |
+
"entertainment/movie_night": [
|
| 101 |
+
"movie night poster cinema event retro",
|
| 102 |
+
"outdoor movie screening poster vintage film",
|
| 103 |
+
],
|
| 104 |
+
|
| 105 |
+
# ---- Festivals ----
|
| 106 |
+
"festivals/christmas": [
|
| 107 |
+
"christmas celebration poster festive red green",
|
| 108 |
+
"merry christmas event poster elegant snowflakes",
|
| 109 |
+
],
|
| 110 |
+
"festivals/diwali": [
|
| 111 |
+
"diwali celebration poster beautiful golden diya HD",
|
| 112 |
+
"deepavali festival poster vibrant rangoli colors",
|
| 113 |
+
"diwali night event poster fireworks sparkle",
|
| 114 |
+
],
|
| 115 |
+
"festivals/durga_puja": [
|
| 116 |
+
"durga puja poster beautiful artistic HD",
|
| 117 |
+
"durga puja celebration poster traditional bengali",
|
| 118 |
+
],
|
| 119 |
+
"festivals/eid": [
|
| 120 |
+
"eid celebration poster beautiful crescent moon",
|
| 121 |
+
"eid mubarak event poster elegant islamic design",
|
| 122 |
+
],
|
| 123 |
+
"festivals/ganesh_chaturthi": [
|
| 124 |
+
"ganesh chaturthi poster design vibrant festival",
|
| 125 |
+
"ganpati celebration poster traditional colorful",
|
| 126 |
+
],
|
| 127 |
+
"festivals/holi": [
|
| 128 |
+
"holi festival poster colorful splash paint HD",
|
| 129 |
+
"holi celebration party poster vibrant gulal",
|
| 130 |
+
],
|
| 131 |
+
"festivals/independence_republic": [
|
| 132 |
+
"india independence day poster tricolor patriotic",
|
| 133 |
+
"republic day celebration poster 26 january",
|
| 134 |
+
],
|
| 135 |
+
"festivals/navratri_garba": [
|
| 136 |
+
"navratri garba poster design colorful dandiya",
|
| 137 |
+
"dandiya night event poster festive vibrant",
|
| 138 |
+
"garba raas festival poster traditional Gujarat",
|
| 139 |
+
],
|
| 140 |
+
"festivals/new_year": [
|
| 141 |
+
"new year celebration poster party fireworks",
|
| 142 |
+
"new year eve event poster glowing golden",
|
| 143 |
+
],
|
| 144 |
+
"festivals/onam": [
|
| 145 |
+
"onam festival poster kathakali traditional Kerala",
|
| 146 |
+
"onam celebration poster pookalam floral boat",
|
| 147 |
+
],
|
| 148 |
+
"festivals/pongal_sankranti": [
|
| 149 |
+
"pongal festival poster traditional Tamil Nadu",
|
| 150 |
+
"makar sankranti poster kite festival colorful",
|
| 151 |
+
],
|
| 152 |
+
|
| 153 |
+
# ---- Social ----
|
| 154 |
+
"social/awareness": [
|
| 155 |
+
"social awareness campaign poster design impactful",
|
| 156 |
+
"mental health awareness poster college creative",
|
| 157 |
+
],
|
| 158 |
+
"social/blood_donation": [
|
| 159 |
+
"blood donation camp poster design red heart",
|
| 160 |
+
"donate blood save lives poster minimal clean",
|
| 161 |
+
],
|
| 162 |
+
"social/charity": [
|
| 163 |
+
"charity event poster design heartfelt giving",
|
| 164 |
+
"fundraiser event poster college community",
|
| 165 |
+
],
|
| 166 |
+
"social/environment": [
|
| 167 |
+
"environment day poster tree planting green earth",
|
| 168 |
+
"eco friendly campaign poster sustainability",
|
| 169 |
+
],
|
| 170 |
+
|
| 171 |
+
# ---- Sports ----
|
| 172 |
+
"sports/athletics": [
|
| 173 |
+
"athletics sports day poster dynamic running",
|
| 174 |
+
"track and field event poster energy motion",
|
| 175 |
+
],
|
| 176 |
+
"sports/badminton_tennis": [
|
| 177 |
+
"badminton tournament poster design sports action",
|
| 178 |
+
"tennis competition poster athletic dynamic",
|
| 179 |
+
],
|
| 180 |
+
"sports/basketball": [
|
| 181 |
+
"basketball tournament poster dynamic slam dunk HD",
|
| 182 |
+
"basketball championship poster sports energy",
|
| 183 |
+
],
|
| 184 |
+
"sports/cricket": [
|
| 185 |
+
"cricket tournament poster design India stadium HD",
|
| 186 |
+
"cricket match poster IPL style vibrant action",
|
| 187 |
+
"cricket championship poster batsman dynamic",
|
| 188 |
+
],
|
| 189 |
+
"sports/esports": [
|
| 190 |
+
"esports tournament poster gaming neon cyberpunk",
|
| 191 |
+
"valorant tournament poster aggressive design",
|
| 192 |
+
"gaming championship poster RGB glowing dark",
|
| 193 |
+
],
|
| 194 |
+
"sports/football": [
|
| 195 |
+
"football tournament poster design action dynamic",
|
| 196 |
+
"soccer championship event poster stadium energy",
|
| 197 |
+
],
|
| 198 |
+
"sports/general": [
|
| 199 |
+
"sports day poster college event medals trophy",
|
| 200 |
+
"annual sports meet poster design vibrant",
|
| 201 |
+
],
|
| 202 |
+
"sports/kabaddi_kho": [
|
| 203 |
+
"kabaddi tournament poster Indian sports action",
|
| 204 |
+
"kho kho competition poster dynamic traditional",
|
| 205 |
+
],
|
| 206 |
+
"sports/yoga_fitness": [
|
| 207 |
+
"yoga day poster peaceful sunrise meditation",
|
| 208 |
+
"fitness challenge poster gym workout energy",
|
| 209 |
+
],
|
| 210 |
+
|
| 211 |
+
# ---- Styles ----
|
| 212 |
+
"styles/3d_futuristic": [
|
| 213 |
+
"futuristic 3D poster design abstract technology",
|
| 214 |
+
"3D event poster sci-fi hologram aesthetic",
|
| 215 |
+
],
|
| 216 |
+
"styles/dark_theme": [
|
| 217 |
+
"dark theme poster design moody elegant",
|
| 218 |
+
"dark aesthetic event poster premium black gold",
|
| 219 |
+
],
|
| 220 |
+
"styles/gradient": [
|
| 221 |
+
"gradient poster design smooth mesh colors",
|
| 222 |
+
"gradient background poster modern vibrant",
|
| 223 |
+
],
|
| 224 |
+
"styles/illustration": [
|
| 225 |
+
"illustrated event poster hand drawn artistic",
|
| 226 |
+
"illustration poster design flat vector creative",
|
| 227 |
+
],
|
| 228 |
+
"styles/minimalist": [
|
| 229 |
+
"minimalist poster design clean modern white",
|
| 230 |
+
"minimal event poster elegant white space",
|
| 231 |
+
],
|
| 232 |
+
"styles/neon_glow": [
|
| 233 |
+
"neon glow poster design vibrant dark",
|
| 234 |
+
"neon lights event poster cyberpunk glowing",
|
| 235 |
+
],
|
| 236 |
+
"styles/retro_vintage": [
|
| 237 |
+
"retro vintage poster design grunge old school",
|
| 238 |
+
"vintage event poster classic typography worn",
|
| 239 |
+
],
|
| 240 |
+
"styles/typography": [
|
| 241 |
+
"typography poster design bold text art creative",
|
| 242 |
+
"typographic event poster lettering experimental",
|
| 243 |
+
],
|
| 244 |
+
"styles/watercolor": [
|
| 245 |
+
"watercolor poster design soft artistic floral",
|
| 246 |
+
"watercolor painting poster pastel dreamy",
|
| 247 |
+
],
|
| 248 |
+
|
| 249 |
+
# ---- Tech Fest ----
|
| 250 |
+
"tech_fest/ai_ml": [
|
| 251 |
+
"AI machine learning event poster futuristic neural",
|
| 252 |
+
"artificial intelligence conference poster technology",
|
| 253 |
+
],
|
| 254 |
+
"tech_fest/coding_competition": [
|
| 255 |
+
"coding competition poster hacker developer dark",
|
| 256 |
+
"code challenge event poster programming terminal",
|
| 257 |
+
],
|
| 258 |
+
"tech_fest/cybersecurity": [
|
| 259 |
+
"cybersecurity event poster hacker CTF dark",
|
| 260 |
+
"cyber security awareness poster digital lock",
|
| 261 |
+
],
|
| 262 |
+
"tech_fest/general": [
|
| 263 |
+
"tech fest poster college futuristic innovation",
|
| 264 |
+
"technology festival poster digital modern",
|
| 265 |
+
],
|
| 266 |
+
"tech_fest/hackathon": [
|
| 267 |
+
"hackathon event poster design code developer",
|
| 268 |
+
"36 hour hackathon poster startup tech vibrant",
|
| 269 |
+
"hack day poster creative developer community",
|
| 270 |
+
],
|
| 271 |
+
"tech_fest/robotics": [
|
| 272 |
+
"robotics competition poster futuristic mechanical",
|
| 273 |
+
"robot challenge event poster technology modern",
|
| 274 |
+
],
|
| 275 |
+
"tech_fest/web_app_dev": [
|
| 276 |
+
"web development workshop poster modern code",
|
| 277 |
+
"app development event poster mobile technology",
|
| 278 |
+
],
|
| 279 |
+
|
| 280 |
+
# ---- Workshops ----
|
| 281 |
+
"workshops/business": [
|
| 282 |
+
"business workshop poster corporate professional",
|
| 283 |
+
"entrepreneurship event poster startup modern",
|
| 284 |
+
],
|
| 285 |
+
"workshops/coding": [
|
| 286 |
+
"coding workshop poster developer bootcamp",
|
| 287 |
+
"programming workshop poster technology education",
|
| 288 |
+
],
|
| 289 |
+
"workshops/conference": [
|
| 290 |
+
"conference event poster professional academic",
|
| 291 |
+
"academic conference poster modern clean",
|
| 292 |
+
],
|
| 293 |
+
"workshops/design": [
|
| 294 |
+
"design workshop poster UI UX creative",
|
| 295 |
+
"graphic design event poster artistic colorful",
|
| 296 |
+
],
|
| 297 |
+
"workshops/placement": [
|
| 298 |
+
"placement drive poster campus recruitment",
|
| 299 |
+
"career fair poster professional job event",
|
| 300 |
+
],
|
| 301 |
+
"workshops/seminar": [
|
| 302 |
+
"seminar event poster professional academic clean",
|
| 303 |
+
"guest lecture poster university speaker modern",
|
| 304 |
+
],
|
| 305 |
+
"workshops/soft_skills": [
|
| 306 |
+
"soft skills workshop poster leadership training",
|
| 307 |
+
"communication skills event poster professional",
|
| 308 |
+
],
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
# ============================================================================
|
| 313 |
+
# IMAGE SEARCH ENGINE (DuckDuckGo — no API key needed)
|
| 314 |
+
# ============================================================================
|
| 315 |
+
|
| 316 |
+
def search_images(query, max_results=8):
|
| 317 |
+
"""Search for images using DuckDuckGo. Returns list of image URLs."""
|
| 318 |
+
headers = {
|
| 319 |
+
"User-Agent": (
|
| 320 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 321 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 322 |
+
"Chrome/120.0.0.0 Safari/537.36"
|
| 323 |
+
)
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
try:
|
| 327 |
+
# Get search token
|
| 328 |
+
resp = requests.get(
|
| 329 |
+
f"https://duckduckgo.com/?q={quote_plus(query)}&iax=images&ia=images",
|
| 330 |
+
headers=headers, timeout=10
|
| 331 |
+
)
|
| 332 |
+
vqd = None
|
| 333 |
+
match = re.search(r"vqd=([\d-]+)", resp.text)
|
| 334 |
+
if match:
|
| 335 |
+
vqd = match.group(1)
|
| 336 |
+
if not vqd:
|
| 337 |
+
# Try alternative pattern
|
| 338 |
+
match = re.search(r"vqd=['\"]?([\d-]+)", resp.text)
|
| 339 |
+
if match:
|
| 340 |
+
vqd = match.group(1)
|
| 341 |
+
if not vqd:
|
| 342 |
+
return []
|
| 343 |
+
|
| 344 |
+
# Fetch image results
|
| 345 |
+
params = {
|
| 346 |
+
"l": "us-en", "o": "json", "q": query,
|
| 347 |
+
"vqd": vqd, "f": ",,,,,", "p": "1",
|
| 348 |
+
}
|
| 349 |
+
resp = requests.get(
|
| 350 |
+
"https://duckduckgo.com/i.js",
|
| 351 |
+
headers=headers, params=params, timeout=10
|
| 352 |
+
)
|
| 353 |
+
data = resp.json()
|
| 354 |
+
|
| 355 |
+
urls = []
|
| 356 |
+
for result in data.get("results", [])[:max_results * 2]:
|
| 357 |
+
url = result.get("image", "")
|
| 358 |
+
if url and url.startswith("http"):
|
| 359 |
+
# Prefer larger images
|
| 360 |
+
width = result.get("width", 0)
|
| 361 |
+
height = result.get("height", 0)
|
| 362 |
+
if width >= 400 and height >= 400:
|
| 363 |
+
urls.append(url)
|
| 364 |
+
elif len(urls) < max_results // 2:
|
| 365 |
+
urls.append(url) # Accept smaller ones if few results
|
| 366 |
+
if len(urls) >= max_results:
|
| 367 |
+
break
|
| 368 |
+
|
| 369 |
+
return urls[:max_results]
|
| 370 |
+
|
| 371 |
+
except Exception as e:
|
| 372 |
+
return []
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
# ============================================================================
|
| 376 |
+
# IMAGE DOWNLOADER with validation
|
| 377 |
+
# ============================================================================
|
| 378 |
+
|
| 379 |
+
def download_image(url, save_path, min_size_kb=15, timeout=12):
|
| 380 |
+
"""Download and validate a single image. Returns True on success."""
|
| 381 |
+
try:
|
| 382 |
+
headers = {
|
| 383 |
+
"User-Agent": (
|
| 384 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 385 |
+
"AppleWebKit/537.36"
|
| 386 |
+
)
|
| 387 |
+
}
|
| 388 |
+
resp = requests.get(url, headers=headers, timeout=timeout, stream=True)
|
| 389 |
+
resp.raise_for_status()
|
| 390 |
+
|
| 391 |
+
content_type = resp.headers.get("Content-Type", "")
|
| 392 |
+
if "image" not in content_type and not any(
|
| 393 |
+
url.lower().endswith(ext) for ext in (".jpg", ".jpeg", ".png", ".webp")
|
| 394 |
+
):
|
| 395 |
+
return False
|
| 396 |
+
|
| 397 |
+
data = resp.content
|
| 398 |
+
|
| 399 |
+
# Skip tiny/broken images
|
| 400 |
+
if len(data) < min_size_kb * 1024:
|
| 401 |
+
return False
|
| 402 |
+
|
| 403 |
+
# Quick header check — verify it's actually an image
|
| 404 |
+
if not (data[:2] == b'\xff\xd8' or # JPEG
|
| 405 |
+
data[:4] == b'\x89PNG' or # PNG
|
| 406 |
+
data[:4] == b'RIFF' or # WebP
|
| 407 |
+
data[:3] == b'GIF'): # GIF
|
| 408 |
+
return False
|
| 409 |
+
|
| 410 |
+
with open(save_path, "wb") as f:
|
| 411 |
+
f.write(data)
|
| 412 |
+
return True
|
| 413 |
+
|
| 414 |
+
except Exception:
|
| 415 |
+
return False
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def get_filename(url, folder):
|
| 419 |
+
"""Generate a unique, deterministic filename from the URL."""
|
| 420 |
+
url_hash = hashlib.md5(url.encode()).hexdigest()[:12]
|
| 421 |
+
return os.path.join(folder, f"tuning_{url_hash}.jpg")
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
# ============================================================================
|
| 425 |
+
# MAIN PIPELINE
|
| 426 |
+
# ============================================================================
|
| 427 |
+
|
| 428 |
+
def main():
|
| 429 |
+
parser = argparse.ArgumentParser(
|
| 430 |
+
description="Tuning Dataset Builder — download fresh poster images for Phase 3"
|
| 431 |
+
)
|
| 432 |
+
parser.add_argument("--target", default="data/tuning",
|
| 433 |
+
help="Root directory to save images into")
|
| 434 |
+
parser.add_argument("--per-category", type=int, default=15,
|
| 435 |
+
help="Target new images per subcategory")
|
| 436 |
+
parser.add_argument("--dry-run", action="store_true",
|
| 437 |
+
help="Preview searches without downloading")
|
| 438 |
+
args = parser.parse_args()
|
| 439 |
+
|
| 440 |
+
total_cats = len(CATEGORIES)
|
| 441 |
+
print("=" * 60)
|
| 442 |
+
print(" TUNING DATASET BUILDER — Phase 3")
|
| 443 |
+
print("=" * 60)
|
| 444 |
+
print(f" Target folder : {args.target}")
|
| 445 |
+
print(f" Per subcategory: {args.per_category} images")
|
| 446 |
+
print(f" Subcategories : {total_cats}")
|
| 447 |
+
print(f" Est. total : ~{total_cats * args.per_category} images")
|
| 448 |
+
print("=" * 60)
|
| 449 |
+
|
| 450 |
+
stats = {"downloaded": 0, "skipped": 0, "failed": 0}
|
| 451 |
+
|
| 452 |
+
for i, (subcat, queries) in enumerate(CATEGORIES.items(), 1):
|
| 453 |
+
folder = os.path.join(args.target, subcat)
|
| 454 |
+
os.makedirs(folder, exist_ok=True)
|
| 455 |
+
|
| 456 |
+
existing = len([f for f in os.listdir(folder)
|
| 457 |
+
if f.lower().endswith((".jpg", ".jpeg", ".png", ".webp"))])
|
| 458 |
+
|
| 459 |
+
print(f"\n[{i:02d}/{total_cats}] 📁 {subcat} ({existing} existing)")
|
| 460 |
+
|
| 461 |
+
if args.dry_run:
|
| 462 |
+
for q in queries:
|
| 463 |
+
print(f" 🔍 Would search: '{q}'")
|
| 464 |
+
continue
|
| 465 |
+
|
| 466 |
+
downloaded = 0
|
| 467 |
+
per_query = max(3, (args.per_category + len(queries) - 1) // len(queries))
|
| 468 |
+
|
| 469 |
+
for query in queries:
|
| 470 |
+
if downloaded >= args.per_category:
|
| 471 |
+
break
|
| 472 |
+
|
| 473 |
+
print(f" 🔍 '{query}'")
|
| 474 |
+
urls = search_images(query, max_results=per_query + 3)
|
| 475 |
+
|
| 476 |
+
if not urls:
|
| 477 |
+
print(f" ⚠️ No results")
|
| 478 |
+
continue
|
| 479 |
+
|
| 480 |
+
for url in urls:
|
| 481 |
+
if downloaded >= args.per_category:
|
| 482 |
+
break
|
| 483 |
+
|
| 484 |
+
filepath = get_filename(url, folder)
|
| 485 |
+
if os.path.exists(filepath):
|
| 486 |
+
stats["skipped"] += 1
|
| 487 |
+
continue
|
| 488 |
+
|
| 489 |
+
if download_image(url, filepath):
|
| 490 |
+
downloaded += 1
|
| 491 |
+
stats["downloaded"] += 1
|
| 492 |
+
print(f" ✅ {downloaded}/{args.per_category}")
|
| 493 |
+
else:
|
| 494 |
+
stats["failed"] += 1
|
| 495 |
+
|
| 496 |
+
# Rate limit — be respectful
|
| 497 |
+
time.sleep(1.5)
|
| 498 |
+
|
| 499 |
+
print(f" → {downloaded} new images saved")
|
| 500 |
+
|
| 501 |
+
# ---- Summary ----
|
| 502 |
+
print("\n" + "=" * 60)
|
| 503 |
+
print(" DOWNLOAD COMPLETE")
|
| 504 |
+
print("=" * 60)
|
| 505 |
+
print(f" ✅ Downloaded : {stats['downloaded']}")
|
| 506 |
+
print(f" ⏭️ Skipped : {stats['skipped']} (duplicates)")
|
| 507 |
+
print(f" ❌ Failed : {stats['failed']}")
|
| 508 |
+
print("=" * 60)
|
| 509 |
+
print("\n Next steps:")
|
| 510 |
+
print(" 1. Caption the new images:")
|
| 511 |
+
print(" python scripts/caption_generator.py --input data/tuning")
|
| 512 |
+
print(" 2. Run Phase 3 training:")
|
| 513 |
+
print(" cd ai-toolkit && python run.py ../configs/train_sdxl_lora_phase3.yaml")
|
| 514 |
+
print()
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
if __name__ == "__main__":
|
| 518 |
+
main()
|