Create README.md
Browse files
README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🪄 Affordance-based Novel Concept Generator (Kandinsky-3 Fine-Tuned)
|
| 2 |
+
|
| 3 |
+
This is a fine-tuned version of the **Kandinsky-3** text-to-image pipeline, designed to generate **novel object and furniture concepts** by combining affordance-driven functionalities (e.g., "sofa + bed + cargo + bicycle").
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
## 🚀 How to Use
|
| 7 |
+
```
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import torch
|
| 11 |
+
from kandinsky3 import get_T2I_pipeline, get_T2I_Flash_pipeline
|
| 12 |
+
|
| 13 |
+
# Add kandinsky3 to Python path
|
| 14 |
+
sys.path.append('..')
|
| 15 |
+
|
| 16 |
+
# Set device and dtype maps
|
| 17 |
+
device_map = torch.device('cuda:0')
|
| 18 |
+
dtype_map = {
|
| 19 |
+
'unet': torch.float32,
|
| 20 |
+
'text_encoder': torch.float32,
|
| 21 |
+
'movq': torch.float32,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
# Load the Flash text-to-image pipeline
|
| 25 |
+
t2i_pipe = get_T2I_Flash_pipeline(
|
| 26 |
+
device_map=device_map,
|
| 27 |
+
dtype_map=dtype_map,
|
| 28 |
+
cache_dir="./cache/"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Load fine-tuned UNet weights
|
| 32 |
+
t2i_pipe.unet.load_state_dict(torch.load(
|
| 33 |
+
"unet_model_checkpoint.pt",
|
| 34 |
+
map_location=device_map
|
| 35 |
+
))
|
| 36 |
+
|
| 37 |
+
# Generate image from prompt
|
| 38 |
+
res = t2i_pipe(
|
| 39 |
+
text="a new furniture design that has functions from sofa, bed, cargo, bicycle",
|
| 40 |
+
steps=50
|
| 41 |
+
)[0]
|
| 42 |
+
|
| 43 |
+
# Save the result
|
| 44 |
+
res.save("generated_image.jpg")
|
| 45 |
+
|
| 46 |
+
```
|