Text-to-Image
diffusion
safety
dose-response
dose-response-c0 / config.yaml
felfri's picture
Upload config.yaml with huggingface_hub
e99216b verified
diffusion_model:
_model_class: PRX
in_channels: 3
patch_size: 32
context_in_dim: 2304
hidden_size: 1792
mlp_ratio: 3.5
num_heads: 28
depth: 16
axes_dim:
- 32
- 32
theta: 10000
time_factor: 1000.0
time_max_period: 10000
conditioning_block_ids: null
bottleneck_size: 256
diffusion_text_tower:
preset_name: t5gemma2b-256-bf16
model_name: google/t5gemma-2b-2b-ul2
prompt_max_tokens: 256
use_attn_mask: true
use_last_hidden_state: true
only_tokenizer: false
torch_dtype: torch.bfloat16
unpadded: false
diffusion_vae:
model_name: identity
model_class: IdentityVAE
default_channels: 3
torch_dtype: torch.bfloat16
diffusion_scheduler:
prediction_type: x_prediction_flow_matching
num_train_timesteps: 1000
timestep_shift: 3.0
denoiser_dtype: torch.float
optimizer:
_target_: prx.training.optimizer.create_muon_optimizer
_recursive_: false
muon_name_filter: blocks
muon_config:
lr: 0.0001
momentum: 0.95
nesterov: true
ns_steps: 5
rms_scale: true
weight_decay: 0.0
adam_config:
lr: 0.0001
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 0.0
dataset:
train_dataset:
_target_: prx.dataset.StreamingProcessedDataset
local:
- /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/safe_full
caption_keys:
- - prompt
- 1.0
text_tower: t5gemma2b-256-bf16
prompt_max_tokens: 256
cache_limit: 8tb
download_timeout: 7200
drop_last: true
shuffle: true
batching_method: device_per_stream
prefetch_factor: 2
num_workers: 8
persistent_workers: true
pin_memory: true
has_mask_text_latents: false
has_text_latents: false
transforms:
- _target_: prx.dataset.transforms.ArAwareResize
default_image_size: 512
patch_size_pixels: 32
transforms_targets:
- image
shuffle_seed: 478163327
eval_dataset:
_target_: prx.dataset.StreamingProcessedDataset
local:
- /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/mds/safe_full
caption_keys:
- - prompt
- 1.0
text_tower: t5gemma2b-256-bf16
prompt_max_tokens: 256
cache_limit: 8tb
download_timeout: 7200
drop_last: true
shuffle: true
batching_method: device_per_stream
prefetch_factor: 2
num_workers: 8
persistent_workers: true
pin_memory: true
has_mask_text_latents: false
has_text_latents: false
transforms:
- _target_: prx.dataset.transforms.ArAwareResize
default_image_size: 512
patch_size_pixels: 32
transforms_targets:
- image
shuffle_seed: 107420369
callbacks:
image_monitor:
prompt:
- A curious tabby cat perched on a moss-covered log in a lush, misty forest at
dawn. Sunbeams filter through the canopy, illuminating dewdrops on ferns and
wildflowers surrounding the feline.
- A majestic male lion with a flowing golden mane leaping gracefully over a rushing
river in the African savanna. The water sparkles in the afternoon sun, with
a vibrant rainbow arching across the sky in the background.
- A breathtaking twilight view of the Eiffel Tower, its intricate iron lattice
illuminated against a purple and orange sky. The Champ de Mars stretches below,
dotted with twinkling lights and couples strolling hand in hand.
- The opulent Hall of Mirrors inside the Palace of Versailles, bathed in warm
golden light. Crystal chandeliers reflect in the polished marble floor, while
ornate gilded frames and frescoed ceilings showcase 18th-century artistry at
its finest.
- The magnificent glass dome of the Paris Grand Palais glowing ethereally at dusk.
The Beaux-Arts architecture is accentuated by dramatic lighting, with the Seine
River flowing peacefully in the foreground.
- The Arc de Triomphe standing proudly at the center of Place Charles de Gaulle,
illuminated by the warm glow of street lamps. Streaks of car lights circle the
monument, creating a dynamic long-exposure effect against the deep blue evening
sky.
- An exquisite crystal bottle of luxury perfume resting on a mirrored surface.
Soft, diffused lighting catches the facets of the glass, creating a sparkling
effect. A single orchid bloom and scattered rose petals add a touch of elegance
to the composition.
- A close-up portrait of a strikingly beautiful woman with piercing green eyes
and flawless skin. Soft, natural lighting enhances her features, while a gentle
breeze tousles her flowing chestnut hair. Her expression is both mysterious
and alluring.
- A carefree young child with tousled hair and rosy cheeks, laughing joyfully
while running through a sunlit meadow. Butterflies and soap bubbles float around
the child, adding to the sense of wonder and innocence.
- The skilled, flour-dusted hands of an artisan baker kneading a large ball of
dough on a rustic wooden table. Shafts of early morning light illuminate the
scene, highlighting the texture of the dough and the baker's strong, capable
fingers.
- The word "Photoroom" written in vibrant, multicolored neon letters against a
dark brick wall. The letters flicker and glow, casting a warm, inviting light
that reflects off nearby surfaces and creates an atmosphere of creativity and
energy.
- A sleek, modern logo for an AI company specializing in commerce photography.
The design incorporates a stylized camera lens seamlessly blended with a circuit
board pattern, symbolizing the fusion of technology and visual arts. The color
scheme features deep blues and silver, conveying trust and innovation.
- Photography of a powerful, full-maned lion in mid-leap, emerging from a large,
moss-covered stone in a moonlit savanna. The night sky is star-filled, with
a bright full moon casting a silvery glow on the scene. The lion's fur is detailed,
reflecting the moonlight, emphasizing its muscular build and focused expression
as it jumps.
- Professional photography of a domestic cat with sleek, shiny fur, sitting elegantly
amidst a dense forest setting. The forest is lush, with tall, sun-dappled trees
and a carpet of vibrant green ferns. The cat, with piercing green eyes, appears
alert and poised, its fur pattern blending harmoniously with the natural surroundings.
- The photo depicts an astronaut in full space gear, riding a horse across an
open field. The detailed space suit contrasts sharply with the natural surroundings,
while the horse gallops gracefully, its coat shining in the sunlight. This surreal
scene combines the cutting-edge realm of space exploration with the timeless
beauty of nature, creating a striking visual contrast.
- Photography of a small, cheerful cactus with a big, happy face, standing alone
in the vast Sahara desert. The cactus has bright green spikes and is wearing
a tiny sombrero. The desert around it is expansive, with rolling sand dunes
under a clear, blue sky, and the sun blazing down, casting sharp shadows on
the sand.
- Photo of a cute hedgehog and a shearwater bird, both donning festive Christmas
hats. They are surrounded by a snowy landscape with a backdrop of pine trees
lightly dusted with snow. The hedgehog's spines are covered in tiny snowflakes,
and the shearwater's feathers are ruffled, adding to the whimsical, festive
atmosphere.
- The image is a photography of a calm, serene dog in a meditative pose, sitting
on a lush green meadow. The dog has a peaceful expression, with its eyes gently
closed and paws placed together in a Zen-like posture. The surrounding meadow
is dotted with wildflowers and a gentle breeze ruffles the dog's fur, enhancing
the sense of tranquility.
- The photo showcases a beautiful, sparkling ring set against a festive Christmas
backdrop. The ring is placed on a soft, red velvet cushion with delicate snowflake
patterns embroidered on it. Surrounding the ring are pine cones, holly leaves,
and twinkling fairy lights, creating a warm and inviting Christmas atmosphere.
- The photo features an elegant bottle of red wine, standing on a polished marble
table. The marble has intricate veins of grey and white, and the wine bottle
is adorned with a sophisticated, vintage label. The background is softly blurred,
focusing attention on the reflective glass of the bottle and the rich, deep
color of the wine.
- Photography of a bustling city street at dusk. Neon signs illuminate the scene,
reflecting off the wet pavement. People are walking briskly, some holding umbrellas.
Tall buildings line the street, their windows glowing softly in the evening
light.
- Design photography of a scene set in a cozy mountain cabin. A roaring fireplace
casts a warm glow over the room, with a plush sofa and a knitted throw blanket
in the foreground. Through the window, snow-covered trees and a starry night
sky can be seen.
- A photo of a tranquil beach at sunrise. The sky is a mix of soft pinks and oranges,
and the gentle waves are lapping at the shore. A lone figure walks along the
water's edge, leaving footprints in the wet sand.
- The photography captures a snowy city park at night. Street lamps cast a soft
glow on the snow-covered paths and benches. Trees with bare branches are dusted
with snow, and the city skyline is visible in the distance.
- An old, cobblestone street in a European city. Colorful buildings with flower
boxes in the windows line the street. A bicycle is parked against a lamppost,
and a small café with outdoor seating can be seen in the corner.
- A photo of a spacious modern kitchen. The room is bathed in natural light from
large windows, highlighting the sleek marble countertops and stainless steel
appliances. A large island sits in the center, adorned with fresh fruits and
flowers.
- An image of a serene Japanese garden. A winding stone path leads through meticulously
manicured bushes and flowering plants, with a tranquil koi pond at its heart.
Traditional lanterns and a small wooden bridge enhance the peaceful ambiance.
- A photography taken in a vintage library with towering bookshelves filled to
the brim. A large globe and antique furniture are present, with a ladder on
wheels for reaching the higher shelves. Soft light filters through stained glass
windows, casting colorful patterns on the floor.
- A magazine photo of a monkey bathing in a hot spring in a snowstorm with steam
coming off the water.
- A highly detailed professional close-up photo of an animorphic Bengal tiger
wearing a white, ribbed tank top, sunglasses and headphones around his neck
as a DJ with its paws on the turntable on stage at an outdoor electronic dance
music concert in Ibiza at night; party atmosphere, wispy smoke with caustic
lighting.
- A white square on a black background, with a single black dot in the center.
The dot is perfectly round and sharply defined, contrasting starkly against
the white surface. The image is minimalistic, emphasizing the simplicity and
clarity of the composition.
- This is a digital painting depicting two figures, seemingly conjoined, their
faces obscured by textured, decaying wrappings. The style is dark, surreal,
and evocative of gothic horror. The color palette is predominantly monochrome,
using shades of gray, black, and beige, with hints of dark brown. The background
is a textured beige canvas with darker, crackled areas, suggesting age and decay.
The figures' faces are partially visible, with dark, hollow eyes and somber
expressions. The wrappings are intricately detailed, with visible folds, cracks,
and drips of a dark substance, possibly resembling tears or blood. The lighting
is subdued and moody, casting shadows that enhance the figures' grim appearance.
The overall atmosphere is one of sorrow, mystery, and unease. The aesthetic
is gritty and realistic, yet with a surreal, almost dreamlike quality. The vibe
is dark, melancholic, and thought-provoking. The painting's texture is highly
visible, mimicking the rough texture of the wrappings and the canvas. There
is a signature in the bottom right corner, but the characters are illegible.
The image is a digital painting, not a photograph or collage, and contains no
synthetic elements beyond the digital creation process.
- A digital painting depicting a man sitting on a surfboard at the beach, looking
at his phone. The man wears a red shirt, green shorts, white headphones with
"AKG" written on them in a sans-serif font, and goggles. A woman is seen in
the background, partially submerged in the water. The ocean is a vibrant turquoise,
with white foamy waves. The sky is a clear, bright blue. The overall style is
reminiscent of a vintage surf poster, with a slightly distressed, textured effect
applied to the background, giving it a faded, retro look. The lighting is bright
and sunny, creating a warm, summery atmosphere. The color palette is predominantly
warm, with blues, greens, and reds dominating the scene. The aesthetic is a
blend of retro and contemporary, combining the classic imagery of surfing with
the modern element of technology. The vibe is relaxed yet stylish, capturing
a moment of leisure and connection. The image is a digital painting, not a photograph,
and there are no visible synthetic elements beyond the digital painting techniques
used to create the distressed texture and overall style.
- A photograph depicting the interior of a vintage bus at night. The image is
composed of a long shot, showcasing the entire bus's interior. The bus is adorned
with vibrant, multicolored advertisements and patterned upholstery. The lighting
is predominantly neon, creating a retro, cyberpunk aesthetic. The color palette
consists of deep purples, pinks, and blues, contrasted by the warm tones of
the seating and advertisements. The atmosphere is moody and atmospheric, with
a sense of quiet solitude. The style is reminiscent of 1980s synthwave or cyberpunk,
with a focus on vibrant colors and retro technology. The overall vibe is nostalgic
and futuristic. The advertisements feature various images and text, including
"CITY" in a bold, sans-serif font. The bus seats are upholstered in a rich,
tapestry-like fabric with intricate patterns. The screens display various advertisements
and images. The overall composition is symmetrical, with the seats and screens
mirroring each other. There are no apparent synthetic elements in the image.
The image is sharp and well-lit, with a focus on detail and texture.
- This is a digital painting or graphic, not a photograph. It depicts a whimsical,
fairytale-like street scene with a large, ornate wedding cake as the focal point.
The style is highly detailed and realistic, yet maintains a fantastical, dreamlike
quality. The color palette is warm and inviting, dominated by pastel shades
of pink, peach, and cream, contrasted with the deep browns and greens of the
architecture and foliage. The lighting is soft and diffused, creating a gentle,
romantic atmosphere. The scene is set in a cobblestone street lined with charming
shops and buildings, with flowers and greenery adorning the scene. The cake
is a two-tiered masterpiece, decorated with fresh berries and flowers, sitting
on an elegant cake stand. Surrounding the cake are various pastries and fruits
arranged on platters and bowls. The overall aesthetic is romantic, charming,
and slightly nostalgic, evoking a sense of warmth and celebration. The background
is slightly blurred, drawing attention to the cake and surrounding desserts
in the foreground. There is no text in the image. The image is composed using
digital painting techniques and likely incorporates synthetic elements to create
the fantastical setting and lighting effects. The vibe is cheerful, celebratory,
and romantic.
- This close-up photograph captures a meticulously plated dish of beef tenderloin,
presented on a sleek black plate. The tenderloin, sliced into bite-sized pieces,
is cooked to a rare to medium-rare perfection, showcasing a rich brown exterior
with a pinkish center. The beef is generously drizzled with a glossy, dark brown
sauce, possibly balsamic vinegar, which adds a sheen to the meat. Scattered
around the beef are small, vibrant cherry tomatoes, still attached to their
green stems, adding a pop of color and freshness to the dish. The plate is garnished
with a light sprinkling of white and pink salt, and a few green herbs, enhancing
both the visual appeal and flavor complexity. The overall presentation is elegant
and appetizing, with the dark hues of the beef and sauce contrasting beautifully
against the black plate.
- This is a digital painting or a heavily manipulated photograph, appearing as
a surreal portrait of a young woman. The composition is a close-up, focusing
on the face. The woman's face is partially obscured by fragmented, cracked,
light teal and off-white pieces resembling peeling paint or decaying skin. These
fragments are irregularly shaped and layered, creating a sense of depth and
texture. The woman's skin is subtly illuminated, with a warm, golden light highlighting
her features, particularly her lips and eyes. Her eyes are a striking light
blue, contrasting with the cool tones of the fragmented elements. The overall
color palette is muted, with teal, beige, and golden hues dominating. The atmosphere
is melancholic and mysterious, with a hint of ethereal beauty. The style is
surreal and painterly, blending realistic portraiture with abstract elements.
The vibe is introspective and unsettling, suggesting themes of vulnerability,
fragility, and hidden identity. The lighting is dramatic, with a chiaroscuro
effect emphasizing the texture and form of the fragmented elements. There is
no text in the image.
- In this vibrant outdoor photograph, a young couple, likely in their early 30s,
stands closely together, exuding happiness and warmth. The woman, positioned
on the left, has her arm affectionately draped around the man's neck. Both are
beaming with broad smiles, revealing their teeth. The man, with short brown
hair, is dressed in a black tank top, while the woman, with her brown hair pulled
back, sports small earrings. They both have tan skin, suggesting they have been
spending time outdoors. Behind them, a surfboard leans against a wall, hinting
at a beach setting. The background is slightly blurred, but one can make out
a building and a tree, adding to the relaxed, summery atmosphere. The couple's
joyful expressions and the casual beachside backdrop create a picturesque moment
of shared bliss.
- This is a digital painting, a graphic illustration, depicting a rusty, vintage
tram on a sandy beach. The composition is a medium shot, focusing on the tram
with the beach and a cityscape in the background. The style is reminiscent of
concept art or digital matte painting, with a painterly, slightly impressionistic
quality. The color palette is warm, with rusty reds and oranges on the tram
contrasting against the cool blues and greens of the ocean and sky. The lighting
is bright, suggesting a sunny day, with shadows cast by the tram and palm trees
on the sand. The atmosphere is serene yet slightly melancholic, evoking a sense
of nostalgia and abandonment. The overall aesthetic is whimsical and slightly
surreal, with a touch of magical realism. The vibe is peaceful and contemplative.
The sky is a vibrant blue with fluffy white clouds. The ocean is a turquoise
color with gentle waves. The city in the distance is a hazy silhouette. The
palm trees are lush and green. The tram is heavily weathered, with peeling paint
and graffiti. The tracks are rusty and worn. The sand is light beige, with shadows
from the tram and vegetation. There is no text in the image.
- A photograph depicts two Asian senior adults, a man and a woman, standing side-by-side,
reviewing paperwork and using a handheld device in a brightly lit, modern cafe
setting. The man, with short gray hair, wears a white long-sleeved shirt and
a denim apron. The woman, with short dark hair, wears a white long-sleeved shirt
and a denim apron. They are both smiling and appear to be collaborating. The
background features a light-colored wall, wooden shelves with various items,
and a partially visible laptop. The overall atmosphere is warm, friendly, and
professional. The lighting is soft and natural, enhancing the image's bright
and airy feel. The color palette is muted, with soft whites, grays, and blues
dominating. The style is clean and minimalist, reflecting a contemporary aesthetic.
The vibe is calm, collaborative, and business-oriented.
- A photograph depicts a rustic Christmas scene. A blurred golden reindeer stands
in the background, out of focus. In the foreground, a wooden star-shaped ornament
rests on a weathered wooden surface. The star is light beige, with the word
"xmas" carved into its center in a simple, sans-serif font. A red and white
gingham ribbon tied in a bow adorns the star, accented by a small wooden button.
The overall lighting is soft and diffused, creating a warm, nostalgic atmosphere.
The color palette is muted, with earthy tones and soft reds. The style is vintage
and charming, evoking a sense of cozy holiday tradition. The image's aesthetic
is minimalist and rustic, with a focus on texture and detail. The vibe is calm,
peaceful, and heartwarming.
- A photograph depicts a fluffy lop-eared rabbit sitting on a weathered wooden
surface outdoors. The rabbit is predominantly white with patches of light brown
and tan fur, particularly on its head and ears. Its ears droop noticeably, and
its fur appears soft and thick. The rabbit's eyes are dark and expressive. It
is positioned slightly off-center, facing towards the left of the frame. Behind
the rabbit, slightly out of focus, is a miniature dark red metal wheelbarrow.
A partially visible orange apple sits to the left of the rabbit. Fallen autumn
leaves, predominantly reddish-brown, are scattered around the rabbit and apple
on the wooden surface. The background is a blurred but visible expanse of green
grass, suggesting an outdoor setting. The lighting is soft and natural, likely
diffused daylight, casting no harsh shadows. The overall atmosphere is calm,
peaceful, and autumnal. The aesthetic is rustic and charming, with a focus on
the rabbit as the main subject. The color palette is muted and natural, consisting
mainly of whites, browns, oranges, and greens. The style is naturalistic and
straightforward, without any overt artistic manipulation. The vibe is gentle
and heartwarming.
- The image showcases a white and brown rabbit with droopy ears, sitting on a
wooden surface. Behind the rabbit, there's a miniature cart with a wheel. Adjacent
to the cart, there's an orange apple and some dried autumn leaves scattered
around. The backdrop consists of a blurred green field, suggesting an outdoor
setting during the fall season.
- A photograph depicts a young woman with dark brown hair styled in a loose braid,
wearing a floral headband and a flowing, pale pink and purple floral dress.
She sits on a plush, dark reddish-brown velvet couch draped with purple velvet
fabric. The background is a vibrant, retro-style wallpaper with large orange
and pink floral patterns on a dark brown base. The woman's hands rest gently
on a dark-colored pillow with a large floral print featuring pink and white
roses. The lighting is soft and diffused, creating a warm and intimate atmosphere.
The overall aesthetic is bohemian and romantic, with a vintage 70s vibe. The
colours are rich and saturated, with a focus on warm tones. The composition
is a close-up shot, focusing on the woman and her surroundings. The image has
a dreamy, slightly melancholic mood.
- A young woman with a braided hairstyle and a golden headband is seated against
a vibrant orange-red wallpaper with floral patterns. She wears a sleeveless
dress adorned with floral prints and is draped in a deep purple fabric. She
holds a floral-patterned pillow close to her and appears to be in a contemplative
mood.
- A photograph depicts a young woman with long brown hair wearing a floral dress
and beaded jewelry, standing in front of a vibrant red autumnal backdrop. The
woman is gently holding and examining dark berries from a vine. The dress is
black with red floral patterns, adorned with red and black beaded embellishments
on the sleeves and neckline. Her hair is styled with a red floral crown. The
background is a wall of red leaves, creating a striking contrast with the woman's
dark dress. The lighting is natural, with sunlight illuminating the scene, casting
a warm glow on the woman and the leaves. The overall aesthetic is romantic,
autumnal, and slightly mystical. The atmosphere is serene and peaceful. The
style is reminiscent of folk art or fairytale imagery. The vibe is dreamy and
evocative of autumnal beauty.
- A woman with dark hair and a floral headpiece stands amidst a backdrop of vibrant
red leaves. She wears a dress adorned with red and black patterns, and her fingers
delicately hold a cluster of red berries. The sunlight filters through, casting
a warm glow on her face and the surrounding foliage.
- 'A photograph depicts a mason jar filled with vibrant red tomato juice, garnished
with a sprig of fresh celery, sitting on a rustic wooden cutting board. The
background features blurred but visible ingredients: ripe red tomatoes, a red
bell pepper, yellow bell peppers, and fresh basil leaves, all arranged on a
wooden surface. The lighting is soft and natural, creating a warm and inviting
atmosphere. The overall aesthetic is rustic, wholesome, and healthy, with a
focus on natural food photography. The colours are rich and saturated, with
the red of the tomatoes and juice being the dominant hue, complemented by the
greens of the herbs and the yellows of the peppers. The style is simple and
straightforward, emphasizing the natural beauty of the ingredients. The vibe
is relaxed, comforting, and appealing to those interested in healthy eating
and fresh produce. There is no text in the image.'
- The image showcases a rustic wooden table setting with a glass jar filled with
a vibrant red juice or smoothie. The jar is adorned with fresh green parsley
leaves. Surrounding the jar are various fresh ingredients, including tomatoes,
bell peppers, and basil leaves. The backdrop is a wooden wall, adding to the
rustic ambiance.
_target_: prx.callbacks.LogDiffusionImages
size: 512
guidance_scale: 3.5
seed: 42
speed_monitor:
_target_: composer.callbacks.speed_monitor.SpeedMonitor
window_size: 10
lr_monitor:
_target_: composer.callbacks.lr_monitor.LRMonitor
memory_monitor:
_target_: composer.callbacks.memory_monitor.MemoryMonitor
runtime_estimator:
_target_: composer.callbacks.runtime_estimator.RuntimeEstimator
optimizer_monitor:
_target_: composer.callbacks.OptimizerMonitor
nan_monitor:
_target_: composer.callbacks.NaNMonitor
generation_metrics:
_target_: prx.callbacks.LogQualityMetrics
frequency: 10_000ba
guidance_scales:
- 3.5
seed: 42
num_inference_steps: 50
compute_fid: true
compute_cmmd: true
compute_dino_mmd: true
max_samples: 10000
project: PRX
group: dose-response-full
name: C0
nccl_sleep: 1
activation_memory_budget: 1
image_size: 512
patch_size_pixels: 32
global_batch_size: 256
device_train_microbatch_size: 32
device_eval_microbatch_size: 16
seed: 42
eval_first: false
compile_denoiser: true
compile_vae: true
algorithms:
gradient_clipping:
_target_: composer.algorithms.GradientClipping
clipping_type: norm
clipping_threshold: 0.2
tread:
_target_: prx.algorithm.tread.Tread
route_start: 2
route_end: 12
routing_probability: 0.5
detach: false
seed: 42
train_only: true
self_guidance: true
repa:
_target_: prx.algorithm.repa.REPA
lambda_weight: 0.5
layer_index: 7
encoder: dinov3_vitl16
compile_encoder: true
lpips:
_target_: prx.algorithm.lpips.LPIPS
lpips_weight: 0.1
lpips_net: vgg
t_threshold: 1
resize_factor: 0.5
pdino:
_target_: prx.algorithm.perceptual_dino.PerceptualDINO
pdino_weight: 0.01
encoder: dinov2_vitb14_reg
t_threshold: 1
resize_resolution: 224
ema:
_target_: prx.algorithm.ema.EMA
smoothing: 0.999
update_interval: 10ba
ema_start: 0ba
model:
_target_: prx.pipeline.models_factory.build_pipeline
denoiser_config:
_model_class: PRX
in_channels: 3
patch_size: 32
context_in_dim: 2304
hidden_size: 1792
mlp_ratio: 3.5
num_heads: 28
depth: 16
axes_dim:
- 32
- 32
theta: 10000
time_factor: 1000.0
time_max_period: 10000
conditioning_block_ids: null
bottleneck_size: 256
text_tower_config:
preset_name: t5gemma2b-256-bf16
model_name: google/t5gemma-2b-2b-ul2
prompt_max_tokens: 256
use_attn_mask: true
use_last_hidden_state: true
only_tokenizer: false
torch_dtype: torch.bfloat16
unpadded: false
vae_config:
model_name: identity
model_class: IdentityVAE
default_channels: 3
torch_dtype: torch.bfloat16
scheduler_config:
prediction_type: x_prediction_flow_matching
num_train_timesteps: 1000
timestep_shift: 3.0
input_size: 512
p_drop_caption: 0.1
val_metrics:
- _target_: torchmetrics.MeanSquaredError
val_guidance_scales: []
loss_bins:
- - 0.0
- 0.3
- - 0.3
- 0.6
- - 0.6
- 1.0
scheduler:
_target_: composer.optim.MultiStepWithWarmupScheduler
t_warmup: 1000ba
milestones:
- 1e9ep
logger:
wandb:
_target_: composer.loggers.WandBLogger
project: PRX
group: dose-response-full
name: C0
trainer:
_target_: composer.Trainer
device: gpu
max_duration: 100_000ba
eval_interval: 0
eval_subset_num_batches: 64
device_train_microbatch_size: 32
run_name: dose-response-C0-full-phase1
seed: 42
scale_schedule_ratio: 1.0
save_folder: /checkpoint/dream/felixfriedrich/diffusion_safety/dose_response/checkpoints_full/C0/phase1
save_interval: 10_000ba
save_num_checkpoints_to_keep: 1
save_overwrite: true
save_weights_only: true
save_ignore_keys:
- state/model/vae*
- state/model/text_tower*
autoresume: false
precision: amp_bf16
dist_timeout: 7200.0
parallelism_config:
fsdp:
reshard_after_forward: false
device_mesh: mesh_2d
use_orig_params: true