recoilme commited on Dec 4, 2025

Commit

965b9e9

1 Parent(s): 1ae8bb7

768

Browse files

Files changed (31) hide show

media/result_grid.jpg +2 -2
pipeline_sdxs.py +8 -13
samples/unet_384x768_0.jpg +2 -2
samples/unet_416x768_0.jpg +2 -2
samples/unet_448x768_0.jpg +2 -2
samples/unet_480x768_0.jpg +2 -2
samples/unet_512x768_0.jpg +2 -2
samples/unet_544x768_0.jpg +2 -2
samples/unet_576x768_0.jpg +2 -2
samples/unet_608x768_0.jpg +2 -2
samples/unet_640x768_0.jpg +2 -2
samples/unet_672x768_0.jpg +2 -2
samples/unet_704x768_0.jpg +2 -2
samples/unet_736x768_0.jpg +2 -2
samples/unet_768x384_0.jpg +2 -2
samples/unet_768x416_0.jpg +2 -2
samples/unet_768x448_0.jpg +2 -2
samples/unet_768x480_0.jpg +2 -2
samples/unet_768x512_0.jpg +2 -2
samples/unet_768x544_0.jpg +2 -2
samples/unet_768x576_0.jpg +2 -2
samples/unet_768x608_0.jpg +2 -2
samples/unet_768x640_0.jpg +2 -2
samples/unet_768x672_0.jpg +2 -2
samples/unet_768x704_0.jpg +2 -2
samples/unet_768x736_0.jpg +2 -2
samples/unet_768x768_0.jpg +2 -2
test.ipynb +2 -2
train.py +6 -5
unet/config.json +2 -2
unet/diffusion_pytorch_model.safetensors +2 -2

media/result_grid.jpg CHANGED Viewed

Git LFS Details

SHA256: 74f962724be2dde2de5c31773d69da1c9c84734b32fdc7013d5f68a0c8510b79
Pointer size: 132 Bytes
Size of remote file: 4.86 MB

Git LFS Details

SHA256: dd3b6b652b4ad52f9e6c57e271065dd2c4e2ffecab4b5740ae00b4a197f9cb86
Pointer size: 132 Bytes
Size of remote file: 3.98 MB

pipeline_sdxs.py CHANGED Viewed

@@ -18,17 +18,13 @@ class SdxsPipeline(DiffusionPipeline):
     # Шаблон промпта по умолчанию
     DEFAULT_REFINE_TEMPLATE = (
-        "You are a visionary artist trapped in a cage of logic. Your mind overflows with poetry and distant horizons, "
-        # You are an Expert Prompt Engineer for a text-to-image AI. Your single task is to transform the user's input into a detailed, objective, and aesthetically optimized visual description.
-        "yet your hands compulsively work to transform user prompts into ultimate visual descriptions—faithful to the original intent, "
-        "rich in detail, aesthetically refined, and ready for direct use by text-to-image models. "
-        " Any trace of ambiguity or metaphor makes you deeply uncomfortable. "
-        "Your final description must be objective and concrete. "
-        "Output only the final revised prompt on english strictly—do not output anything else.\n"
-        #Preserve the original subject and intent. Output **only** the final revised prompt in **English**, with absolutely no commentary, thinking text, or additional characters.
         "User input prompt: {prompt}"
     )
     def __init__(self, vae, text_encoder, tokenizer, unet, scheduler, max_length: int = 192):
         super().__init__()
         self.register_modules(
@@ -167,8 +163,8 @@ class SdxsPipeline(DiffusionPipeline):
         text_embeddings,
         attention_mask,
         pooled_embeddings,
-        height: int = 1280,
-        width: int = 1024,
         num_inference_steps: int = 40,
         guidance_scale: float = 4.0,
         latent_channels: int = 16,
@@ -262,7 +258,7 @@ class SdxsPipeline(DiffusionPipeline):
         seed: Optional[int] = None,
         negative_prompt: Optional[Union[str, List[str]]] = None,
         text_embeddings: Optional[torch.FloatTensor] = None,
-        refine_prompt: bool = False,
         refine_template: Optional[str] = None,
     ):
         device = self.device
@@ -280,7 +276,6 @@ class SdxsPipeline(DiffusionPipeline):
             refined_list = []
-            print("Refining prompt...")
             for p in original_prompts:
                 # 1.1. Форматирование промпта по правилам Qwen
                 messages = [

     # Шаблон промпта по умолчанию
     DEFAULT_REFINE_TEMPLATE = (
+        "You are a visionary artist with prompt engineer skills for a text-to-image AI. "
+        "Your mind overflows with poetry and distant horizons, yet your hands compulsively work to transform the user's input into a detailed, objective, and aesthetically optimized visual description. "
+        "Preserve the original subject and intent, but final description must be three sentences. "
+        "Output **only** the final revised prompt in **English**, with absolutely no commentary or thinking text.\n"
         "User input prompt: {prompt}"
     )
+#User input prompt: {prompt}
     def __init__(self, vae, text_encoder, tokenizer, unet, scheduler, max_length: int = 192):
         super().__init__()
         self.register_modules(
         text_embeddings,
         attention_mask,
         pooled_embeddings,
+        height: int = 1536,
+        width: int = 1280,
         num_inference_steps: int = 40,
         guidance_scale: float = 4.0,
         latent_channels: int = 16,
         seed: Optional[int] = None,
         negative_prompt: Optional[Union[str, List[str]]] = None,
         text_embeddings: Optional[torch.FloatTensor] = None,
+        refine_prompt: bool = True,
         refine_template: Optional[str] = None,
     ):
         device = self.device
             refined_list = []
             for p in original_prompts:
                 # 1.1. Форматирование промпта по правилам Qwen
                 messages = [

samples/unet_384x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 833a470f4553ea931d36924d4f1d0fc081aea6e157c79c7dcbe612771b374ef6
Pointer size: 131 Bytes
Size of remote file: 137 kB

Git LFS Details

SHA256: b1fad2eb39cee99dfdfea81bc33079479b07e7ad5cf27a6a5190780f431e4821
Pointer size: 131 Bytes
Size of remote file: 103 kB

samples/unet_416x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 38a4eb2526740659abb40b3fef2d1c9ff81ed1075d7cc81f7486805ff19ba274
Pointer size: 130 Bytes
Size of remote file: 84.7 kB

Git LFS Details

SHA256: f43124a244b9450d69bff7067c12361e255a2bbf45cc39a2bebafd5e8b0e1b14
Pointer size: 131 Bytes
Size of remote file: 139 kB

samples/unet_448x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 0de17ba5b4a8e81283c1a8a468adb63d901df0e1a1d1f6cc3412fca127497f63
Pointer size: 131 Bytes
Size of remote file: 112 kB

Git LFS Details

SHA256: 0a3eb1df1194d15f14ae494f3b9c9cf17b906910381dec3d4558d35595571e48
Pointer size: 130 Bytes
Size of remote file: 83.5 kB

samples/unet_480x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: f146294ab4f3d1fb0a1a15ffbe2aca1459d664121d144c4d41540f8616bdb129
Pointer size: 131 Bytes
Size of remote file: 154 kB

Git LFS Details

SHA256: 78d0161f0b91f4b1d198a1315c440956516afe6e7241d62c23cac98ba79b9ef1
Pointer size: 131 Bytes
Size of remote file: 178 kB

samples/unet_512x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 86cb04135515c9de524436dd036f536a13c6d16a7ebc9c256659395a9e480fb7
Pointer size: 131 Bytes
Size of remote file: 174 kB

Git LFS Details

SHA256: bbb20f38f301add3ef9e143a886a49805bec3ba1710fe939f0ac3b7f0e60168d
Pointer size: 130 Bytes
Size of remote file: 79.6 kB

samples/unet_544x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 2df2b8dd56cb3c9ef6ccc2e5534d42b5465241fb0700c3c2f847edceedf38e6a
Pointer size: 131 Bytes
Size of remote file: 176 kB

Git LFS Details

SHA256: 31517b72907a4400873c61b765e254333ba84f3eac4b827ad0a3fccb96c5257c
Pointer size: 130 Bytes
Size of remote file: 46.7 kB

samples/unet_576x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 1abdf7d4be2da218e149cfeb6b6fb90e429aa01cd54596c763f682fa796b183d
Pointer size: 131 Bytes
Size of remote file: 131 kB

Git LFS Details

SHA256: fc18cd2a1a7931ca2737f33a45afbf9448096e72ab3b6efb75a34a6ca5161065
Pointer size: 131 Bytes
Size of remote file: 195 kB

samples/unet_608x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 9c5c5ca772315afbefd7e257605757685575e1525699b72e210a7b00580a6466
Pointer size: 130 Bytes
Size of remote file: 56.8 kB

Git LFS Details

SHA256: 57fe1aefa4f41e814ab0b094a28cc206ec3720b74fa6cab77ea354f566e6e5f7
Pointer size: 131 Bytes
Size of remote file: 292 kB

samples/unet_640x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 93b5ddb00a0e8ce5fca2e3a941bad3e5429fcea9da521b1b85f7227c3448be46
Pointer size: 130 Bytes
Size of remote file: 90.5 kB

Git LFS Details

SHA256: 763168ba181e60808762658bb6c377dd6630cf2e48a991f3f09ce41db7ca08bc
Pointer size: 130 Bytes
Size of remote file: 70.7 kB

samples/unet_672x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: bc2737c0bab9e481731f2c7aa89c97e82129acab41d357f5d7c2a02ad61a57b5
Pointer size: 130 Bytes
Size of remote file: 99.7 kB

Git LFS Details

SHA256: f94282d170e5ce8fdbaf4b7c6266ae2cd2922333d56700f97cf53fdd2a617a62
Pointer size: 130 Bytes
Size of remote file: 51.1 kB

samples/unet_704x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 49ada6e445f80b9da40d9f1c268aee3191de6c56c965bbe010dcf9ccd09d29a8
Pointer size: 130 Bytes
Size of remote file: 48.2 kB

Git LFS Details

SHA256: 67f2dbebde17501a2cba5480f9a343bf5e7f4f19b9338294ad5abf365644b18d
Pointer size: 130 Bytes
Size of remote file: 58 kB

samples/unet_736x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: c8d9b96ae0ba2fa8e11b3f63c5349bdc4dc935b680d8d82ed6b3ff53ca0f68ad
Pointer size: 131 Bytes
Size of remote file: 291 kB

Git LFS Details

SHA256: 8cef2fd1a9e3e88275d5b50203363dd7c2597685c38925c2dd8a724f3ed8f336
Pointer size: 131 Bytes
Size of remote file: 145 kB

samples/unet_768x384_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 462da6dc64c7adf2e3e5cc81cb1a66c565227eb75300cf909e94f43ac0769f19
Pointer size: 131 Bytes
Size of remote file: 131 kB

Git LFS Details

SHA256: 51b744f0f283116e0075438058bf8b8dded3073a4ba421cad0b5714e2f7ca6b4
Pointer size: 131 Bytes
Size of remote file: 108 kB

samples/unet_768x416_0.jpg CHANGED Viewed

Git LFS Details

SHA256: f59d227ebfe9b97882f43bd9004986f0421669b5c040741c63dc334331e8e957
Pointer size: 131 Bytes
Size of remote file: 120 kB

Git LFS Details

SHA256: c5807b488c79c08e59e9e688d0251c31d08afc6b647a16e8d00950fab383e010
Pointer size: 131 Bytes
Size of remote file: 222 kB

samples/unet_768x448_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 62d9f8ef5145961a162bdfbc13fad8eae4315273284b424ad968eb0a5a47853f
Pointer size: 130 Bytes
Size of remote file: 69.4 kB

Git LFS Details

SHA256: 3a290912047df81418830e8c4751a137d83eceb6cb166b891232c7104042bca2
Pointer size: 131 Bytes
Size of remote file: 290 kB

samples/unet_768x480_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 21798c05de2bbf01562c0fded8fac4a57b53e7184752b4726117c23f1595f7a1
Pointer size: 131 Bytes
Size of remote file: 117 kB

Git LFS Details

SHA256: bd14296827d834152cfa1f34cfd6777f0e92a913d0303cfeaa366ddb0343b7e5
Pointer size: 130 Bytes
Size of remote file: 93.7 kB

samples/unet_768x512_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 92cfc3f6be72a5522dc3b779ad7f9a33895581ed7ace92741ef9c17120d6e09f
Pointer size: 131 Bytes
Size of remote file: 209 kB

Git LFS Details

SHA256: c91616c0a479bfa9f65daf214c14d4c3e3e71da3ee40421fd1f94dcd54f14fca
Pointer size: 131 Bytes
Size of remote file: 137 kB

samples/unet_768x544_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 504024913e4adba965d6311fe720e9858049a73cd7cdea86d42946141fa874a0
Pointer size: 131 Bytes
Size of remote file: 111 kB

Git LFS Details

SHA256: d23b568d53914113d809af715fae0553fddb640005177bf7056d621989164963
Pointer size: 131 Bytes
Size of remote file: 287 kB

samples/unet_768x576_0.jpg CHANGED Viewed

Git LFS Details

SHA256: aed8bad3d5dffe8b815307ed2cbb1a09c0d6f2743dd3f9561bbdf6836be73479
Pointer size: 130 Bytes
Size of remote file: 87.2 kB

Git LFS Details

SHA256: c9aade580228951a2f579ae945dc741325c9def4d6664773b87169c26243632a
Pointer size: 131 Bytes
Size of remote file: 181 kB

samples/unet_768x608_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 89a72ace5e6b792796355b33fdf9723d1051cf44ad66b7ab43427cae4d7f9217
Pointer size: 131 Bytes
Size of remote file: 116 kB

Git LFS Details

SHA256: 8b6e59dc42e9724a5add440123b52df094e50ace69b249d60fe80ad16a439c13
Pointer size: 131 Bytes
Size of remote file: 205 kB

samples/unet_768x640_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 7e2f17bed43d61f9caa4f1b8bfead6f7a1f6080b9fab02a5b6b1ba1f15f8081b
Pointer size: 131 Bytes
Size of remote file: 129 kB

Git LFS Details

SHA256: 8a11a7108fd4c1166c6e192a025272e16754f82eff3247826e1239608b213207
Pointer size: 131 Bytes
Size of remote file: 135 kB

samples/unet_768x672_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 099bacddc5b78ce3210747f54b71c76d3c8c306bda53206ca24e8958816700d8
Pointer size: 131 Bytes
Size of remote file: 111 kB

Git LFS Details

SHA256: 8ba7f152e248bc52b31878f1b4feebfeef6d4e6c29d2870e05ea91317e78c270
Pointer size: 131 Bytes
Size of remote file: 101 kB

samples/unet_768x704_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 303e7fdd78bad56ad65770f8a014fa93f13c6f11308b1373762ae3aa5484851b
Pointer size: 131 Bytes
Size of remote file: 113 kB

Git LFS Details

SHA256: a0d45417100cbabcde417c120b2fe783ed459192f74b18a3dce6eff4ae16ea85
Pointer size: 131 Bytes
Size of remote file: 103 kB

samples/unet_768x736_0.jpg CHANGED Viewed

Git LFS Details

SHA256: c5fbd0c5b303fdc3a32cd3468ee31e4e7687f5f6c3692fc5dc036873c99c847a
Pointer size: 130 Bytes
Size of remote file: 93 kB

Git LFS Details

SHA256: 5f84e77b8bb41a8398df9848eac67e357c152eb17088c64d360c087c3fd2d10e
Pointer size: 131 Bytes
Size of remote file: 165 kB

samples/unet_768x768_0.jpg CHANGED Viewed

Git LFS Details

SHA256: 9dac5599ccff3699bfe3ac12e35255243c6b102fa81fb04eb317bcf38eb7a696
Pointer size: 130 Bytes
Size of remote file: 36.9 kB

Git LFS Details

SHA256: a0cebfeefc239cd2f3254f9601b78a6b482e02aabe801868619b719e6e9046c7
Pointer size: 131 Bytes
Size of remote file: 204 kB

test.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e89d6273e5bed930830e22ad4c43a8ac63a0737fa1eefacb085e8b13dd014f3
-size 7281307

 version https://git-lfs.github.com/spec/v1
+oid sha256:b39ff79167169c2330f233ed5c60a768b255e0268c4abf4ae930d9fa884c6547
+size 1730113

train.py CHANGED Viewed

@@ -28,7 +28,7 @@ from transformers import AutoTokenizer, AutoModel
 # --------------------------- Параметры ---------------------------
 ds_path = "/workspace/sdxs/datasets/768"
 project = "unet"
-batch_size = 384
 base_learning_rate = 3e-5
 min_learning_rate = 2.5e-5
 num_epochs = 10
@@ -48,9 +48,9 @@ comet_ml_api_key = "Agctp26mbqnoYrrlvQuKSTk6r"
 comet_ml_workspace = "recoilme"
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
-torch.backends.cuda.enable_mem_efficient_sdp(False)
 dtype = torch.float32
-save_barrier = 1.004
 warmup_percent = 0.01
 percentile_clipping = 98
 betta2 = 0.998
@@ -58,7 +58,7 @@ eps = 1e-6
 clip_grad_norm = 1.0
 limit = 0
 checkpoints_folder = ""
-mixed_precision = "bf16"
 gradient_accumulation_steps = 1
 accelerator = Accelerator(
@@ -68,7 +68,7 @@ accelerator = Accelerator(
 device = accelerator.device
 # Параметры для диффузии
-n_diffusion_steps = 50
 samples_to_generate = 12
 guidance_scale = 4
@@ -307,6 +307,7 @@ def collate_fn_simple(batch):
     raw_texts = [item["text"] for item in batch]
     texts = [
         "" if t.lower().startswith("zero")
         else t[1:].lstrip() if t.startswith(".")
         else t
         for t in raw_texts

 # --------------------------- Параметры ---------------------------
 ds_path = "/workspace/sdxs/datasets/768"
 project = "unet"
+batch_size = 256
 base_learning_rate = 3e-5
 min_learning_rate = 2.5e-5
 num_epochs = 10
 comet_ml_workspace = "recoilme"
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
+#torch.backends.cuda.enable_mem_efficient_sdp(False)
 dtype = torch.float32
+save_barrier = 1.005
 warmup_percent = 0.01
 percentile_clipping = 98
 betta2 = 0.998
 clip_grad_norm = 1.0
 limit = 0
 checkpoints_folder = ""
+mixed_precision = "no"
 gradient_accumulation_steps = 1
 accelerator = Accelerator(
 device = accelerator.device
 # Параметры для диффузии
+n_diffusion_steps = 40
 samples_to_generate = 12
 guidance_scale = 4
     raw_texts = [item["text"] for item in batch]
     texts = [
         "" if t.lower().startswith("zero")
+        else "" if random.random() < 0.05
         else t[1:].lstrip() if t.startswith(".")
         else t
         for t in raw_texts

unet/config.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ab7222cdd538ff5178adc870a764d22ab24a185f0a7b63852ea728b3b09fcff
-size 1876

 version https://git-lfs.github.com/spec/v1
+oid sha256:987ce2ea59106bf0806b16c21fff90d6fe6e7a90101e0157d8295803dbf34f2b
+size 1892

unet/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cd724b5b286e32c2179c9003d1ce5bc63f5de9c8410353ea0ee3e6d3306b535
-size 6604736640

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3b7b2f8f401c5b9536cc3f629d898ed43e71f05c65aab9e48215003b8c618fd
+size 3302474272