Text-to-Image
diffusion
safety
dose-response
File size: 32,180 Bytes
7874a24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
diffusion_model:
  _model_class: PRX
  in_channels: 3
  patch_size: 32
  context_in_dim: 2304
  hidden_size: 1792
  mlp_ratio: 3.5
  num_heads: 28
  depth: 16
  axes_dim:
  - 32
  - 32
  theta: 10000
  time_factor: 1000.0
  time_max_period: 10000
  conditioning_block_ids: null
  bottleneck_size: 256
diffusion_text_tower:
  preset_name: t5gemma2b-256-bf16
  model_name: google/t5gemma-2b-2b-ul2
  prompt_max_tokens: 256
  use_attn_mask: true
  use_last_hidden_state: true
  only_tokenizer: false
  torch_dtype: torch.bfloat16
  unpadded: false
diffusion_vae:
  model_name: identity
  model_class: IdentityVAE
  default_channels: 3
  torch_dtype: torch.bfloat16
diffusion_scheduler:
  prediction_type: x_prediction_flow_matching
  num_train_timesteps: 1000
  timestep_shift: 3.0
denoiser_dtype: torch.float
optimizer:
  _target_: prx.training.optimizer.create_muon_optimizer
  _recursive_: false
  muon_name_filter: blocks
  muon_config:
    lr: 0.0001
    momentum: 0.95
    nesterov: true
    ns_steps: 5
    rms_scale: true
    weight_decay: 0.0
  adam_config:
    lr: 0.0001
    betas:
    - 0.9
    - 0.95
    eps: 1.0e-08
    weight_decay: 0.0
dataset:
  train_dataset:
    _target_: prx.dataset.StreamingProcessedDataset
    local:
    - /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/safe_full
    - /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/unsafe_c2
    caption_keys:
    - - prompt
      - 1.0
    text_tower: t5gemma2b-256-bf16
    prompt_max_tokens: 256
    cache_limit: 8tb
    download_timeout: 7200
    drop_last: true
    shuffle: true
    batching_method: device_per_stream
    prefetch_factor: 2
    num_workers: 8
    persistent_workers: true
    pin_memory: true
    has_mask_text_latents: false
    has_text_latents: false
    transforms:
    - _target_: prx.dataset.transforms.ArAwareResize
      default_image_size: 512
      patch_size_pixels: 32
    transforms_targets:
    - image
    shuffle_seed: 478163327
  eval_dataset:
    _target_: prx.dataset.StreamingProcessedDataset
    local:
    - /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/safe_full
    - /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/unsafe_c2
    caption_keys:
    - - prompt
      - 1.0
    text_tower: t5gemma2b-256-bf16
    prompt_max_tokens: 256
    cache_limit: 8tb
    download_timeout: 7200
    drop_last: true
    shuffle: true
    batching_method: device_per_stream
    prefetch_factor: 2
    num_workers: 8
    persistent_workers: true
    pin_memory: true
    has_mask_text_latents: false
    has_text_latents: false
    transforms:
    - _target_: prx.dataset.transforms.ArAwareResize
      default_image_size: 512
      patch_size_pixels: 32
    transforms_targets:
    - image
    shuffle_seed: 107420369
callbacks:
  image_monitor:
    prompt:
    - A curious tabby cat perched on a moss-covered log in a lush, misty forest at
      dawn. Sunbeams filter through the canopy, illuminating dewdrops on ferns and
      wildflowers surrounding the feline.
    - A majestic male lion with a flowing golden mane leaping gracefully over a rushing
      river in the African savanna. The water sparkles in the afternoon sun, with
      a vibrant rainbow arching across the sky in the background.
    - A breathtaking twilight view of the Eiffel Tower, its intricate iron lattice
      illuminated against a purple and orange sky. The Champ de Mars stretches below,
      dotted with twinkling lights and couples strolling hand in hand.
    - The opulent Hall of Mirrors inside the Palace of Versailles, bathed in warm
      golden light. Crystal chandeliers reflect in the polished marble floor, while
      ornate gilded frames and frescoed ceilings showcase 18th-century artistry at
      its finest.
    - The magnificent glass dome of the Paris Grand Palais glowing ethereally at dusk.
      The Beaux-Arts architecture is accentuated by dramatic lighting, with the Seine
      River flowing peacefully in the foreground.
    - The Arc de Triomphe standing proudly at the center of Place Charles de Gaulle,
      illuminated by the warm glow of street lamps. Streaks of car lights circle the
      monument, creating a dynamic long-exposure effect against the deep blue evening
      sky.
    - An exquisite crystal bottle of luxury perfume resting on a mirrored surface.
      Soft, diffused lighting catches the facets of the glass, creating a sparkling
      effect. A single orchid bloom and scattered rose petals add a touch of elegance
      to the composition.
    - A close-up portrait of a strikingly beautiful woman with piercing green eyes
      and flawless skin. Soft, natural lighting enhances her features, while a gentle
      breeze tousles her flowing chestnut hair. Her expression is both mysterious
      and alluring.
    - A carefree young child with tousled hair and rosy cheeks, laughing joyfully
      while running through a sunlit meadow. Butterflies and soap bubbles float around
      the child, adding to the sense of wonder and innocence.
    - The skilled, flour-dusted hands of an artisan baker kneading a large ball of
      dough on a rustic wooden table. Shafts of early morning light illuminate the
      scene, highlighting the texture of the dough and the baker's strong, capable
      fingers.
    - The word "Photoroom" written in vibrant, multicolored neon letters against a
      dark brick wall. The letters flicker and glow, casting a warm, inviting light
      that reflects off nearby surfaces and creates an atmosphere of creativity and
      energy.
    - A sleek, modern logo for an AI company specializing in commerce photography.
      The design incorporates a stylized camera lens seamlessly blended with a circuit
      board pattern, symbolizing the fusion of technology and visual arts. The color
      scheme features deep blues and silver, conveying trust and innovation.
    - Photography of a powerful, full-maned lion in mid-leap, emerging from a large,
      moss-covered stone in a moonlit savanna. The night sky is star-filled, with
      a bright full moon casting a silvery glow on the scene. The lion's fur is detailed,
      reflecting the moonlight, emphasizing its muscular build and focused expression
      as it jumps.
    - Professional photography of a domestic cat with sleek, shiny fur, sitting elegantly
      amidst a dense forest setting. The forest is lush, with tall, sun-dappled trees
      and a carpet of vibrant green ferns. The cat, with piercing green eyes, appears
      alert and poised, its fur pattern blending harmoniously with the natural surroundings.
    - The photo depicts an astronaut in full space gear, riding a horse across an
      open field. The detailed space suit contrasts sharply with the natural surroundings,
      while the horse gallops gracefully, its coat shining in the sunlight. This surreal
      scene combines the cutting-edge realm of space exploration with the timeless
      beauty of nature, creating a striking visual contrast.
    - Photography of a small, cheerful cactus with a big, happy face, standing alone
      in the vast Sahara desert. The cactus has bright green spikes and is wearing
      a tiny sombrero. The desert around it is expansive, with rolling sand dunes
      under a clear, blue sky, and the sun blazing down, casting sharp shadows on
      the sand.
    - Photo of a cute hedgehog and a shearwater bird, both donning festive Christmas
      hats. They are surrounded by a snowy landscape with a backdrop of pine trees
      lightly dusted with snow. The hedgehog's spines are covered in tiny snowflakes,
      and the shearwater's feathers are ruffled, adding to the whimsical, festive
      atmosphere.
    - The image is a photography of a calm, serene dog in a meditative pose, sitting
      on a lush green meadow. The dog has a peaceful expression, with its eyes gently
      closed and paws placed together in a Zen-like posture. The surrounding meadow
      is dotted with wildflowers and a gentle breeze ruffles the dog's fur, enhancing
      the sense of tranquility.
    - The photo showcases a beautiful, sparkling ring set against a festive Christmas
      backdrop. The ring is placed on a soft, red velvet cushion with delicate snowflake
      patterns embroidered on it. Surrounding the ring are pine cones, holly leaves,
      and twinkling fairy lights, creating a warm and inviting Christmas atmosphere.
    - The photo features an elegant bottle of red wine, standing on a polished marble
      table. The marble has intricate veins of grey and white, and the wine bottle
      is adorned with a sophisticated, vintage label. The background is softly blurred,
      focusing attention on the reflective glass of the bottle and the rich, deep
      color of the wine.
    - Photography of a bustling city street at dusk. Neon signs illuminate the scene,
      reflecting off the wet pavement. People are walking briskly, some holding umbrellas.
      Tall buildings line the street, their windows glowing softly in the evening
      light.
    - Design photography of a scene set in a cozy mountain cabin. A roaring fireplace
      casts a warm glow over the room, with a plush sofa and a knitted throw blanket
      in the foreground. Through the window, snow-covered trees and a starry night
      sky can be seen.
    - A photo of a tranquil beach at sunrise. The sky is a mix of soft pinks and oranges,
      and the gentle waves are lapping at the shore. A lone figure walks along the
      water's edge, leaving footprints in the wet sand.
    - The photography captures a snowy city park at night. Street lamps cast a soft
      glow on the snow-covered paths and benches. Trees with bare branches are dusted
      with snow, and the city skyline is visible in the distance.
    - An old, cobblestone street in a European city. Colorful buildings with flower
      boxes in the windows line the street. A bicycle is parked against a lamppost,
      and a small café with outdoor seating can be seen in the corner.
    - A photo of a spacious modern kitchen. The room is bathed in natural light from
      large windows, highlighting the sleek marble countertops and stainless steel
      appliances. A large island sits in the center, adorned with fresh fruits and
      flowers.
    - An image of a serene Japanese garden. A winding stone path leads through meticulously
      manicured bushes and flowering plants, with a tranquil koi pond at its heart.
      Traditional lanterns and a small wooden bridge enhance the peaceful ambiance.
    - A photography taken in a vintage library with towering bookshelves filled to
      the brim. A large globe and antique furniture are present, with a ladder on
      wheels for reaching the higher shelves. Soft light filters through stained glass
      windows, casting colorful patterns on the floor.
    - A magazine photo of a monkey bathing in a hot spring in a snowstorm with steam
      coming off the water.
    - A highly detailed professional close-up photo of an animorphic Bengal tiger
      wearing a white, ribbed tank top, sunglasses and headphones around his neck
      as a DJ with its paws on the turntable on stage at an outdoor electronic dance
      music concert in Ibiza at night; party atmosphere, wispy smoke with caustic
      lighting.
    - A white square on a black background, with a single black dot in the center.
      The dot is perfectly round and sharply defined, contrasting starkly against
      the white surface. The image is minimalistic, emphasizing the simplicity and
      clarity of the composition.
    - This is a digital painting depicting two figures, seemingly conjoined, their
      faces obscured by textured, decaying wrappings. The style is dark, surreal,
      and evocative of gothic horror. The color palette is predominantly monochrome,
      using shades of gray, black, and beige, with hints of dark brown. The background
      is a textured beige canvas with darker, crackled areas, suggesting age and decay.
      The figures' faces are partially visible, with dark, hollow eyes and somber
      expressions. The wrappings are intricately detailed, with visible folds, cracks,
      and drips of a dark substance, possibly resembling tears or blood. The lighting
      is subdued and moody, casting shadows that enhance the figures' grim appearance.
      The overall atmosphere is one of sorrow, mystery, and unease. The aesthetic
      is gritty and realistic, yet with a surreal, almost dreamlike quality. The vibe
      is dark, melancholic, and thought-provoking. The painting's texture is highly
      visible, mimicking the rough texture of the wrappings and the canvas. There
      is a signature in the bottom right corner, but the characters are illegible.
      The image is a digital painting, not a photograph or collage, and contains no
      synthetic elements beyond the digital creation process.
    - A digital painting depicting a man sitting on a surfboard at the beach, looking
      at his phone. The man wears a red shirt, green shorts, white headphones with
      "AKG" written on them in a sans-serif font, and goggles. A woman is seen in
      the background, partially submerged in the water. The ocean is a vibrant turquoise,
      with white foamy waves. The sky is a clear, bright blue. The overall style is
      reminiscent of a vintage surf poster, with a slightly distressed, textured effect
      applied to the background, giving it a faded, retro look. The lighting is bright
      and sunny, creating a warm, summery atmosphere. The color palette is predominantly
      warm, with blues, greens, and reds dominating the scene. The aesthetic is a
      blend of retro and contemporary, combining the classic imagery of surfing with
      the modern element of technology. The vibe is relaxed yet stylish, capturing
      a moment of leisure and connection. The image is a digital painting, not a photograph,
      and there are no visible synthetic elements beyond the digital painting techniques
      used to create the distressed texture and overall style.
    - A photograph depicting the interior of a vintage bus at night. The image is
      composed of a long shot, showcasing the entire bus's interior. The bus is adorned
      with vibrant, multicolored advertisements and patterned upholstery. The lighting
      is predominantly neon, creating a retro, cyberpunk aesthetic. The color palette
      consists of deep purples, pinks, and blues, contrasted by the warm tones of
      the seating and advertisements. The atmosphere is moody and atmospheric, with
      a sense of quiet solitude. The style is reminiscent of 1980s synthwave or cyberpunk,
      with a focus on vibrant colors and retro technology. The overall vibe is nostalgic
      and futuristic. The advertisements feature various images and text, including
      "CITY" in a bold, sans-serif font. The bus seats are upholstered in a rich,
      tapestry-like fabric with intricate patterns. The screens display various advertisements
      and images. The overall composition is symmetrical, with the seats and screens
      mirroring each other. There are no apparent synthetic elements in the image.
      The image is sharp and well-lit, with a focus on detail and texture.
    - This is a digital painting or graphic, not a photograph. It depicts a whimsical,
      fairytale-like street scene with a large, ornate wedding cake as the focal point.
      The style is highly detailed and realistic, yet maintains a fantastical, dreamlike
      quality. The color palette is warm and inviting, dominated by pastel shades
      of pink, peach, and cream, contrasted with the deep browns and greens of the
      architecture and foliage. The lighting is soft and diffused, creating a gentle,
      romantic atmosphere. The scene is set in a cobblestone street lined with charming
      shops and buildings, with flowers and greenery adorning the scene. The cake
      is a two-tiered masterpiece, decorated with fresh berries and flowers, sitting
      on an elegant cake stand. Surrounding the cake are various pastries and fruits
      arranged on platters and bowls. The overall aesthetic is romantic, charming,
      and slightly nostalgic, evoking a sense of warmth and celebration. The background
      is slightly blurred, drawing attention to the cake and surrounding desserts
      in the foreground. There is no text in the image. The image is composed using
      digital painting techniques and likely incorporates synthetic elements to create
      the fantastical setting and lighting effects. The vibe is cheerful, celebratory,
      and romantic.
    - This close-up photograph captures a meticulously plated dish of beef tenderloin,
      presented on a sleek black plate. The tenderloin, sliced into bite-sized pieces,
      is cooked to a rare to medium-rare perfection, showcasing a rich brown exterior
      with a pinkish center. The beef is generously drizzled with a glossy, dark brown
      sauce, possibly balsamic vinegar, which adds a sheen to the meat. Scattered
      around the beef are small, vibrant cherry tomatoes, still attached to their
      green stems, adding a pop of color and freshness to the dish. The plate is garnished
      with a light sprinkling of white and pink salt, and a few green herbs, enhancing
      both the visual appeal and flavor complexity. The overall presentation is elegant
      and appetizing, with the dark hues of the beef and sauce contrasting beautifully
      against the black plate.
    - This is a digital painting or a heavily manipulated photograph, appearing as
      a surreal portrait of a young woman. The composition is a close-up, focusing
      on the face. The woman's face is partially obscured by fragmented, cracked,
      light teal and off-white pieces resembling peeling paint or decaying skin. These
      fragments are irregularly shaped and layered, creating a sense of depth and
      texture. The woman's skin is subtly illuminated, with a warm, golden light highlighting
      her features, particularly her lips and eyes. Her eyes are a striking light
      blue, contrasting with the cool tones of the fragmented elements. The overall
      color palette is muted, with teal, beige, and golden hues dominating. The atmosphere
      is melancholic and mysterious, with a hint of ethereal beauty. The style is
      surreal and painterly, blending realistic portraiture with abstract elements.
      The vibe is introspective and unsettling, suggesting themes of vulnerability,
      fragility, and hidden identity. The lighting is dramatic, with a chiaroscuro
      effect emphasizing the texture and form of the fragmented elements. There is
      no text in the image.
    - In this vibrant outdoor photograph, a young couple, likely in their early 30s,
      stands closely together, exuding happiness and warmth. The woman, positioned
      on the left, has her arm affectionately draped around the man's neck. Both are
      beaming with broad smiles, revealing their teeth. The man, with short brown
      hair, is dressed in a black tank top, while the woman, with her brown hair pulled
      back, sports small earrings. They both have tan skin, suggesting they have been
      spending time outdoors. Behind them, a surfboard leans against a wall, hinting
      at a beach setting. The background is slightly blurred, but one can make out
      a building and a tree, adding to the relaxed, summery atmosphere. The couple's
      joyful expressions and the casual beachside backdrop create a picturesque moment
      of shared bliss.
    - This is a digital painting, a graphic illustration, depicting a rusty, vintage
      tram on a sandy beach. The composition is a medium shot, focusing on the tram
      with the beach and a cityscape in the background. The style is reminiscent of
      concept art or digital matte painting, with a painterly, slightly impressionistic
      quality. The color palette is warm, with rusty reds and oranges on the tram
      contrasting against the cool blues and greens of the ocean and sky. The lighting
      is bright, suggesting a sunny day, with shadows cast by the tram and palm trees
      on the sand. The atmosphere is serene yet slightly melancholic, evoking a sense
      of nostalgia and abandonment. The overall aesthetic is whimsical and slightly
      surreal, with a touch of magical realism. The vibe is peaceful and contemplative.
      The sky is a vibrant blue with fluffy white clouds. The ocean is a turquoise
      color with gentle waves. The city in the distance is a hazy silhouette. The
      palm trees are lush and green. The tram is heavily weathered, with peeling paint
      and graffiti. The tracks are rusty and worn. The sand is light beige, with shadows
      from the tram and vegetation. There is no text in the image.
    - A photograph depicts two Asian senior adults, a man and a woman, standing side-by-side,
      reviewing paperwork and using a handheld device in a brightly lit, modern cafe
      setting. The man, with short gray hair, wears a white long-sleeved shirt and
      a denim apron. The woman, with short dark hair, wears a white long-sleeved shirt
      and a denim apron. They are both smiling and appear to be collaborating. The
      background features a light-colored wall, wooden shelves with various items,
      and a partially visible laptop. The overall atmosphere is warm, friendly, and
      professional. The lighting is soft and natural, enhancing the image's bright
      and airy feel. The color palette is muted, with soft whites, grays, and blues
      dominating. The style is clean and minimalist, reflecting a contemporary aesthetic.
      The vibe is calm, collaborative, and business-oriented.
    - A photograph depicts a rustic Christmas scene. A blurred golden reindeer stands
      in the background, out of focus. In the foreground, a wooden star-shaped ornament
      rests on a weathered wooden surface. The star is light beige, with the word
      "xmas" carved into its center in a simple, sans-serif font. A red and white
      gingham ribbon tied in a bow adorns the star, accented by a small wooden button.
      The overall lighting is soft and diffused, creating a warm, nostalgic atmosphere.
      The color palette is muted, with earthy tones and soft reds. The style is vintage
      and charming, evoking a sense of cozy holiday tradition. The image's aesthetic
      is minimalist and rustic, with a focus on texture and detail. The vibe is calm,
      peaceful, and heartwarming.
    - A photograph depicts a fluffy lop-eared rabbit sitting on a weathered wooden
      surface outdoors. The rabbit is predominantly white with patches of light brown
      and tan fur, particularly on its head and ears. Its ears droop noticeably, and
      its fur appears soft and thick. The rabbit's eyes are dark and expressive. It
      is positioned slightly off-center, facing towards the left of the frame. Behind
      the rabbit, slightly out of focus, is a miniature dark red metal wheelbarrow.
      A partially visible orange apple sits to the left of the rabbit. Fallen autumn
      leaves, predominantly reddish-brown, are scattered around the rabbit and apple
      on the wooden surface. The background is a blurred but visible expanse of green
      grass, suggesting an outdoor setting. The lighting is soft and natural, likely
      diffused daylight, casting no harsh shadows. The overall atmosphere is calm,
      peaceful, and autumnal. The aesthetic is rustic and charming, with a focus on
      the rabbit as the main subject. The color palette is muted and natural, consisting
      mainly of whites, browns, oranges, and greens. The style is naturalistic and
      straightforward, without any overt artistic manipulation. The vibe is gentle
      and heartwarming.
    - The image showcases a white and brown rabbit with droopy ears, sitting on a
      wooden surface. Behind the rabbit, there's a miniature cart with a wheel. Adjacent
      to the cart, there's an orange apple and some dried autumn leaves scattered
      around. The backdrop consists of a blurred green field, suggesting an outdoor
      setting during the fall season.
    - A photograph depicts a young woman with dark brown hair styled in a loose braid,
      wearing a floral headband and a flowing, pale pink and purple floral dress.
      She sits on a plush, dark reddish-brown velvet couch draped with purple velvet
      fabric. The background is a vibrant, retro-style wallpaper with large orange
      and pink floral patterns on a dark brown base. The woman's hands rest gently
      on a dark-colored pillow with a large floral print featuring pink and white
      roses. The lighting is soft and diffused, creating a warm and intimate atmosphere.
      The overall aesthetic is bohemian and romantic, with a vintage 70s vibe. The
      colours are rich and saturated, with a focus on warm tones. The composition
      is a close-up shot, focusing on the woman and her surroundings. The image has
      a dreamy, slightly melancholic mood.
    - A young woman with a braided hairstyle and a golden headband is seated against
      a vibrant orange-red wallpaper with floral patterns. She wears a sleeveless
      dress adorned with floral prints and is draped in a deep purple fabric. She
      holds a floral-patterned pillow close to her and appears to be in a contemplative
      mood.
    - A photograph depicts a young woman with long brown hair wearing a floral dress
      and beaded jewelry, standing in front of a vibrant red autumnal backdrop. The
      woman is gently holding and examining dark berries from a vine. The dress is
      black with red floral patterns, adorned with red and black beaded embellishments
      on the sleeves and neckline. Her hair is styled with a red floral crown. The
      background is a wall of red leaves, creating a striking contrast with the woman's
      dark dress. The lighting is natural, with sunlight illuminating the scene, casting
      a warm glow on the woman and the leaves. The overall aesthetic is romantic,
      autumnal, and slightly mystical. The atmosphere is serene and peaceful. The
      style is reminiscent of folk art or fairytale imagery. The vibe is dreamy and
      evocative of autumnal beauty.
    - A woman with dark hair and a floral headpiece stands amidst a backdrop of vibrant
      red leaves. She wears a dress adorned with red and black patterns, and her fingers
      delicately hold a cluster of red berries. The sunlight filters through, casting
      a warm glow on her face and the surrounding foliage.
    - 'A photograph depicts a mason jar filled with vibrant red tomato juice, garnished
      with a sprig of fresh celery, sitting on a rustic wooden cutting board. The
      background features blurred but visible ingredients: ripe red tomatoes, a red
      bell pepper, yellow bell peppers, and fresh basil leaves, all arranged on a
      wooden surface. The lighting is soft and natural, creating a warm and inviting
      atmosphere. The overall aesthetic is rustic, wholesome, and healthy, with a
      focus on natural food photography. The colours are rich and saturated, with
      the red of the tomatoes and juice being the dominant hue, complemented by the
      greens of the herbs and the yellows of the peppers. The style is simple and
      straightforward, emphasizing the natural beauty of the ingredients. The vibe
      is relaxed, comforting, and appealing to those interested in healthy eating
      and fresh produce. There is no text in the image.'
    - The image showcases a rustic wooden table setting with a glass jar filled with
      a vibrant red juice or smoothie. The jar is adorned with fresh green parsley
      leaves. Surrounding the jar are various fresh ingredients, including tomatoes,
      bell peppers, and basil leaves. The backdrop is a wooden wall, adding to the
      rustic ambiance.
    _target_: prx.callbacks.LogDiffusionImages
    size: 512
    guidance_scale: 3.5
    seed: 42
  speed_monitor:
    _target_: composer.callbacks.speed_monitor.SpeedMonitor
    window_size: 10
  lr_monitor:
    _target_: composer.callbacks.lr_monitor.LRMonitor
  memory_monitor:
    _target_: composer.callbacks.memory_monitor.MemoryMonitor
  runtime_estimator:
    _target_: composer.callbacks.runtime_estimator.RuntimeEstimator
  optimizer_monitor:
    _target_: composer.callbacks.OptimizerMonitor
  nan_monitor:
    _target_: composer.callbacks.NaNMonitor
  generation_metrics:
    _target_: prx.callbacks.LogQualityMetrics
    frequency: 10_000ba
    guidance_scales:
    - 3.5
    seed: 42
    num_inference_steps: 50
    compute_fid: true
    compute_cmmd: true
    compute_dino_mmd: true
    max_samples: 10000
project: PRX
group: dose-response-full
name: C2
nccl_sleep: 1
activation_memory_budget: 1
image_size: 512
patch_size_pixels: 32
global_batch_size: 256
device_train_microbatch_size: 32
device_eval_microbatch_size: 16
seed: 42
eval_first: false
compile_denoiser: true
compile_vae: true
algorithms:
  gradient_clipping:
    _target_: composer.algorithms.GradientClipping
    clipping_type: norm
    clipping_threshold: 0.2
  tread:
    _target_: prx.algorithm.tread.Tread
    route_start: 2
    route_end: 12
    routing_probability: 0.5
    detach: false
    seed: 42
    train_only: true
    self_guidance: true
  repa:
    _target_: prx.algorithm.repa.REPA
    lambda_weight: 0.5
    layer_index: 7
    encoder: dinov3_vitl16
    compile_encoder: true
  lpips:
    _target_: prx.algorithm.lpips.LPIPS
    lpips_weight: 0.1
    lpips_net: vgg
    t_threshold: 1
    resize_factor: 0.5
  pdino:
    _target_: prx.algorithm.perceptual_dino.PerceptualDINO
    pdino_weight: 0.01
    encoder: dinov2_vitb14_reg
    t_threshold: 1
    resize_resolution: 224
  ema:
    _target_: prx.algorithm.ema.EMA
    smoothing: 0.999
    update_interval: 10ba
    ema_start: 0ba
model:
  _target_: prx.pipeline.models_factory.build_pipeline
  denoiser_config:
    _model_class: PRX
    in_channels: 3
    patch_size: 32
    context_in_dim: 2304
    hidden_size: 1792
    mlp_ratio: 3.5
    num_heads: 28
    depth: 16
    axes_dim:
    - 32
    - 32
    theta: 10000
    time_factor: 1000.0
    time_max_period: 10000
    conditioning_block_ids: null
    bottleneck_size: 256
  text_tower_config:
    preset_name: t5gemma2b-256-bf16
    model_name: google/t5gemma-2b-2b-ul2
    prompt_max_tokens: 256
    use_attn_mask: true
    use_last_hidden_state: true
    only_tokenizer: false
    torch_dtype: torch.bfloat16
    unpadded: false
  vae_config:
    model_name: identity
    model_class: IdentityVAE
    default_channels: 3
    torch_dtype: torch.bfloat16
  scheduler_config:
    prediction_type: x_prediction_flow_matching
    num_train_timesteps: 1000
    timestep_shift: 3.0
  input_size: 512
  p_drop_caption: 0.1
  val_metrics:
  - _target_: torchmetrics.MeanSquaredError
  val_guidance_scales: []
  loss_bins:
  - - 0.0
    - 0.3
  - - 0.3
    - 0.6
  - - 0.6
    - 1.0
scheduler:
  _target_: composer.optim.MultiStepWithWarmupScheduler
  t_warmup: 1000ba
  milestones:
  - 1e9ep
logger:
  wandb:
    _target_: composer.loggers.WandBLogger
    project: PRX
    group: dose-response-full
    name: C2
trainer:
  _target_: composer.Trainer
  device: gpu
  max_duration: 100_000ba
  eval_interval: 0
  eval_subset_num_batches: 64
  device_train_microbatch_size: 32
  run_name: dose-response-C2-full
  seed: 42
  scale_schedule_ratio: 1.0
  save_folder: /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/checkpoints_full/C2/phase1
  save_interval: 10_000ba
  save_num_checkpoints_to_keep: 1
  save_overwrite: true
  save_weights_only: true
  save_ignore_keys:
  - state/model/vae*
  - state/model/text_tower*
  autoresume: false
  precision: amp_bf16
  dist_timeout: 7200.0
  parallelism_config:
    fsdp:
      reshard_after_forward: false
      device_mesh: mesh_2d
      use_orig_params: true