768
Browse files- media/result_grid.jpg +2 -2
- pipeline_sdxs.py +8 -13
- samples/unet_384x768_0.jpg +2 -2
- samples/unet_416x768_0.jpg +2 -2
- samples/unet_448x768_0.jpg +2 -2
- samples/unet_480x768_0.jpg +2 -2
- samples/unet_512x768_0.jpg +2 -2
- samples/unet_544x768_0.jpg +2 -2
- samples/unet_576x768_0.jpg +2 -2
- samples/unet_608x768_0.jpg +2 -2
- samples/unet_640x768_0.jpg +2 -2
- samples/unet_672x768_0.jpg +2 -2
- samples/unet_704x768_0.jpg +2 -2
- samples/unet_736x768_0.jpg +2 -2
- samples/unet_768x384_0.jpg +2 -2
- samples/unet_768x416_0.jpg +2 -2
- samples/unet_768x448_0.jpg +2 -2
- samples/unet_768x480_0.jpg +2 -2
- samples/unet_768x512_0.jpg +2 -2
- samples/unet_768x544_0.jpg +2 -2
- samples/unet_768x576_0.jpg +2 -2
- samples/unet_768x608_0.jpg +2 -2
- samples/unet_768x640_0.jpg +2 -2
- samples/unet_768x672_0.jpg +2 -2
- samples/unet_768x704_0.jpg +2 -2
- samples/unet_768x736_0.jpg +2 -2
- samples/unet_768x768_0.jpg +2 -2
- test.ipynb +2 -2
- train.py +6 -5
- unet/config.json +2 -2
- unet/diffusion_pytorch_model.safetensors +2 -2
media/result_grid.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
pipeline_sdxs.py
CHANGED
|
@@ -18,17 +18,13 @@ class SdxsPipeline(DiffusionPipeline):
|
|
| 18 |
|
| 19 |
# Шаблон промпта по умолчанию
|
| 20 |
DEFAULT_REFINE_TEMPLATE = (
|
| 21 |
-
"You are a visionary artist
|
| 22 |
-
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
" Any trace of ambiguity or metaphor makes you deeply uncomfortable. "
|
| 26 |
-
"Your final description must be objective and concrete. "
|
| 27 |
-
"Output only the final revised prompt on english strictly—do not output anything else.\n"
|
| 28 |
-
#Preserve the original subject and intent. Output **only** the final revised prompt in **English**, with absolutely no commentary, thinking text, or additional characters.
|
| 29 |
"User input prompt: {prompt}"
|
| 30 |
)
|
| 31 |
-
|
| 32 |
def __init__(self, vae, text_encoder, tokenizer, unet, scheduler, max_length: int = 192):
|
| 33 |
super().__init__()
|
| 34 |
self.register_modules(
|
|
@@ -167,8 +163,8 @@ class SdxsPipeline(DiffusionPipeline):
|
|
| 167 |
text_embeddings,
|
| 168 |
attention_mask,
|
| 169 |
pooled_embeddings,
|
| 170 |
-
height: int =
|
| 171 |
-
width: int =
|
| 172 |
num_inference_steps: int = 40,
|
| 173 |
guidance_scale: float = 4.0,
|
| 174 |
latent_channels: int = 16,
|
|
@@ -262,7 +258,7 @@ class SdxsPipeline(DiffusionPipeline):
|
|
| 262 |
seed: Optional[int] = None,
|
| 263 |
negative_prompt: Optional[Union[str, List[str]]] = None,
|
| 264 |
text_embeddings: Optional[torch.FloatTensor] = None,
|
| 265 |
-
refine_prompt: bool =
|
| 266 |
refine_template: Optional[str] = None,
|
| 267 |
):
|
| 268 |
device = self.device
|
|
@@ -280,7 +276,6 @@ class SdxsPipeline(DiffusionPipeline):
|
|
| 280 |
|
| 281 |
refined_list = []
|
| 282 |
|
| 283 |
-
print("Refining prompt...")
|
| 284 |
for p in original_prompts:
|
| 285 |
# 1.1. Форматирование промпта по правилам Qwen
|
| 286 |
messages = [
|
|
|
|
| 18 |
|
| 19 |
# Шаблон промпта по умолчанию
|
| 20 |
DEFAULT_REFINE_TEMPLATE = (
|
| 21 |
+
"You are a visionary artist with prompt engineer skills for a text-to-image AI. "
|
| 22 |
+
"Your mind overflows with poetry and distant horizons, yet your hands compulsively work to transform the user's input into a detailed, objective, and aesthetically optimized visual description. "
|
| 23 |
+
"Preserve the original subject and intent, but final description must be three sentences. "
|
| 24 |
+
"Output **only** the final revised prompt in **English**, with absolutely no commentary or thinking text.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"User input prompt: {prompt}"
|
| 26 |
)
|
| 27 |
+
#User input prompt: {prompt}
|
| 28 |
def __init__(self, vae, text_encoder, tokenizer, unet, scheduler, max_length: int = 192):
|
| 29 |
super().__init__()
|
| 30 |
self.register_modules(
|
|
|
|
| 163 |
text_embeddings,
|
| 164 |
attention_mask,
|
| 165 |
pooled_embeddings,
|
| 166 |
+
height: int = 1536,
|
| 167 |
+
width: int = 1280,
|
| 168 |
num_inference_steps: int = 40,
|
| 169 |
guidance_scale: float = 4.0,
|
| 170 |
latent_channels: int = 16,
|
|
|
|
| 258 |
seed: Optional[int] = None,
|
| 259 |
negative_prompt: Optional[Union[str, List[str]]] = None,
|
| 260 |
text_embeddings: Optional[torch.FloatTensor] = None,
|
| 261 |
+
refine_prompt: bool = True,
|
| 262 |
refine_template: Optional[str] = None,
|
| 263 |
):
|
| 264 |
device = self.device
|
|
|
|
| 276 |
|
| 277 |
refined_list = []
|
| 278 |
|
|
|
|
| 279 |
for p in original_prompts:
|
| 280 |
# 1.1. Форматирование промпта по правилам Qwen
|
| 281 |
messages = [
|
samples/unet_384x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_416x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_448x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_480x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_512x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_544x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_576x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_608x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_640x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_672x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_704x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_736x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x384_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x416_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x448_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x480_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x512_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x544_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x576_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x608_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x640_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x672_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x704_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x736_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
samples/unet_768x768_0.jpg
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
test.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b39ff79167169c2330f233ed5c60a768b255e0268c4abf4ae930d9fa884c6547
|
| 3 |
+
size 1730113
|
train.py
CHANGED
|
@@ -28,7 +28,7 @@ from transformers import AutoTokenizer, AutoModel
|
|
| 28 |
# --------------------------- Параметры ---------------------------
|
| 29 |
ds_path = "/workspace/sdxs/datasets/768"
|
| 30 |
project = "unet"
|
| 31 |
-
batch_size =
|
| 32 |
base_learning_rate = 3e-5
|
| 33 |
min_learning_rate = 2.5e-5
|
| 34 |
num_epochs = 10
|
|
@@ -48,9 +48,9 @@ comet_ml_api_key = "Agctp26mbqnoYrrlvQuKSTk6r"
|
|
| 48 |
comet_ml_workspace = "recoilme"
|
| 49 |
torch.backends.cuda.matmul.allow_tf32 = True
|
| 50 |
torch.backends.cudnn.allow_tf32 = True
|
| 51 |
-
torch.backends.cuda.enable_mem_efficient_sdp(False)
|
| 52 |
dtype = torch.float32
|
| 53 |
-
save_barrier = 1.
|
| 54 |
warmup_percent = 0.01
|
| 55 |
percentile_clipping = 98
|
| 56 |
betta2 = 0.998
|
|
@@ -58,7 +58,7 @@ eps = 1e-6
|
|
| 58 |
clip_grad_norm = 1.0
|
| 59 |
limit = 0
|
| 60 |
checkpoints_folder = ""
|
| 61 |
-
mixed_precision = "
|
| 62 |
gradient_accumulation_steps = 1
|
| 63 |
|
| 64 |
accelerator = Accelerator(
|
|
@@ -68,7 +68,7 @@ accelerator = Accelerator(
|
|
| 68 |
device = accelerator.device
|
| 69 |
|
| 70 |
# Параметры для диффузии
|
| 71 |
-
n_diffusion_steps =
|
| 72 |
samples_to_generate = 12
|
| 73 |
guidance_scale = 4
|
| 74 |
|
|
@@ -307,6 +307,7 @@ def collate_fn_simple(batch):
|
|
| 307 |
raw_texts = [item["text"] for item in batch]
|
| 308 |
texts = [
|
| 309 |
"" if t.lower().startswith("zero")
|
|
|
|
| 310 |
else t[1:].lstrip() if t.startswith(".")
|
| 311 |
else t
|
| 312 |
for t in raw_texts
|
|
|
|
| 28 |
# --------------------------- Параметры ---------------------------
|
| 29 |
ds_path = "/workspace/sdxs/datasets/768"
|
| 30 |
project = "unet"
|
| 31 |
+
batch_size = 256
|
| 32 |
base_learning_rate = 3e-5
|
| 33 |
min_learning_rate = 2.5e-5
|
| 34 |
num_epochs = 10
|
|
|
|
| 48 |
comet_ml_workspace = "recoilme"
|
| 49 |
torch.backends.cuda.matmul.allow_tf32 = True
|
| 50 |
torch.backends.cudnn.allow_tf32 = True
|
| 51 |
+
#torch.backends.cuda.enable_mem_efficient_sdp(False)
|
| 52 |
dtype = torch.float32
|
| 53 |
+
save_barrier = 1.005
|
| 54 |
warmup_percent = 0.01
|
| 55 |
percentile_clipping = 98
|
| 56 |
betta2 = 0.998
|
|
|
|
| 58 |
clip_grad_norm = 1.0
|
| 59 |
limit = 0
|
| 60 |
checkpoints_folder = ""
|
| 61 |
+
mixed_precision = "no"
|
| 62 |
gradient_accumulation_steps = 1
|
| 63 |
|
| 64 |
accelerator = Accelerator(
|
|
|
|
| 68 |
device = accelerator.device
|
| 69 |
|
| 70 |
# Параметры для диффузии
|
| 71 |
+
n_diffusion_steps = 40
|
| 72 |
samples_to_generate = 12
|
| 73 |
guidance_scale = 4
|
| 74 |
|
|
|
|
| 307 |
raw_texts = [item["text"] for item in batch]
|
| 308 |
texts = [
|
| 309 |
"" if t.lower().startswith("zero")
|
| 310 |
+
else "" if random.random() < 0.05
|
| 311 |
else t[1:].lstrip() if t.startswith(".")
|
| 312 |
else t
|
| 313 |
for t in raw_texts
|
unet/config.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:987ce2ea59106bf0806b16c21fff90d6fe6e7a90101e0157d8295803dbf34f2b
|
| 3 |
+
size 1892
|
unet/diffusion_pytorch_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3b7b2f8f401c5b9536cc3f629d898ed43e71f05c65aab9e48215003b8c618fd
|
| 3 |
+
size 3302474272
|