Upload 5 files
Browse files- README.md +10 -10
- beamInference.py +4 -4
- beam_diffusion.py +20 -8
- requirements.txt +10 -74
README.md
CHANGED
|
@@ -27,8 +27,11 @@ python3 -m venv beam_env
|
|
| 27 |
source beam_env/bin/activate # On macOS/Linux
|
| 28 |
# beam_env\Scripts\activate # On Windows
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
# 3. Install required dependencies
|
| 31 |
-
pip install -r ./
|
| 32 |
```
|
| 33 |
---
|
| 34 |
## 🚀 Quickstart Guide
|
|
@@ -37,26 +40,21 @@ Here's a basic example of how to use BeamDiffusion with the `transformers` libra
|
|
| 37 |
|
| 38 |
```python
|
| 39 |
from huggingface_hub import snapshot_download
|
|
|
|
| 40 |
# Download the model snapshot
|
| 41 |
snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
|
| 42 |
-
from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig,BeamDiffusionModel
|
| 43 |
|
| 44 |
# Initialize the configuration, model, and pipeline
|
| 45 |
-
config = BeamDiffusionConfig()
|
| 46 |
model = BeamDiffusionModel(config)
|
| 47 |
pipe = BeamDiffusionPipeline(model)
|
| 48 |
-
|
| 49 |
# Define the input parameters
|
| 50 |
input_data = {
|
| 51 |
"steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
|
| 52 |
"A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
|
| 53 |
"Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
|
| 54 |
-
"latents_idx": [0, 1, 2, 3],
|
| 55 |
-
"n_seeds": 4,
|
| 56 |
-
"steps_back": 2,
|
| 57 |
-
"beam_width": 2,
|
| 58 |
-
"window_size": 2,
|
| 59 |
-
"use_rand": True
|
| 60 |
}
|
| 61 |
|
| 62 |
# Generate the sequence of images
|
|
@@ -69,6 +67,8 @@ sequence_imgs = pipe(input_data)
|
|
| 69 |
|
| 70 |
## 🔍 Input Parameters Explained
|
| 71 |
|
|
|
|
|
|
|
| 72 |
- **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
|
| 73 |
|
| 74 |
- **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.
|
|
|
|
| 27 |
source beam_env/bin/activate # On macOS/Linux
|
| 28 |
# beam_env\Scripts\activate # On Windows
|
| 29 |
|
| 30 |
+
pip install huggingface_hub
|
| 31 |
+
huggingface-cli download Gui28F/BeamDiffusion2 --include requirements.txt --local-dir .
|
| 32 |
+
|
| 33 |
# 3. Install required dependencies
|
| 34 |
+
pip install -r ./requirements.txt
|
| 35 |
```
|
| 36 |
---
|
| 37 |
## 🚀 Quickstart Guide
|
|
|
|
| 40 |
|
| 41 |
```python
|
| 42 |
from huggingface_hub import snapshot_download
|
| 43 |
+
|
| 44 |
# Download the model snapshot
|
| 45 |
snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
|
| 46 |
+
from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig, BeamDiffusionModel
|
| 47 |
|
| 48 |
# Initialize the configuration, model, and pipeline
|
| 49 |
+
config = BeamDiffusionConfig(sd="SD-2.1", latents_idx=[0, 1, 2, 3], n_seeds=4, steps_back=2, beam_width=2, window_size=2, use_rand=True)
|
| 50 |
model = BeamDiffusionModel(config)
|
| 51 |
pipe = BeamDiffusionPipeline(model)
|
| 52 |
+
|
| 53 |
# Define the input parameters
|
| 54 |
input_data = {
|
| 55 |
"steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
|
| 56 |
"A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
|
| 57 |
"Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
|
| 60 |
# Generate the sequence of images
|
|
|
|
| 67 |
|
| 68 |
## 🔍 Input Parameters Explained
|
| 69 |
|
| 70 |
+
- **`sd`** (`str`): The base model to use for image generation. The available options are `SD-2.1` and `flux`.
|
| 71 |
+
|
| 72 |
- **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
|
| 73 |
|
| 74 |
- **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.
|
beamInference.py
CHANGED
|
@@ -10,7 +10,7 @@ def set_softmax(nodes, softmax, n_latents, n_max_latents):
|
|
| 10 |
for node, softmax_value in zip(nodes, softmax):
|
| 11 |
node.set_softmax(softmax_value, n_latents, n_max_latents)
|
| 12 |
|
| 13 |
-
def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
|
| 14 |
while len(seeds) < n_seeds:
|
| 15 |
seeds.append(random.randint(0, 10**6))
|
| 16 |
captions = steps
|
|
@@ -19,7 +19,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
|
|
| 19 |
for i, caption in enumerate(captions):
|
| 20 |
if i == 0:
|
| 21 |
for seed in seeds:
|
| 22 |
-
latents, img = gen_img(caption, seed=seed)
|
| 23 |
new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
|
| 24 |
img, latents, None)
|
| 25 |
nodes_to_explore.append(new_node)
|
|
@@ -30,7 +30,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
|
|
| 30 |
current_step_embeddings, current_image_embeddings = [], []
|
| 31 |
if use_rand:
|
| 32 |
seed = random.randint(0, 10 ** 6)
|
| 33 |
-
latents, img = gen_img(caption, seed=seed)
|
| 34 |
new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
|
| 35 |
img, latents, None)
|
| 36 |
parent_childs.append(new_node)
|
|
@@ -43,7 +43,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
|
|
| 43 |
for ancestor_idx, ancestor in enumerate(ancestors):
|
| 44 |
for latent in latents_idx:
|
| 45 |
ancestor_latent = ancestor.get_latent(latent)
|
| 46 |
-
latents, img = gen_img(caption, latent=ancestor_latent)
|
| 47 |
new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
|
| 48 |
parent_childs.append(new_node)
|
| 49 |
current_step_embedding, current_image_embedding = new_node.get_features()
|
|
|
|
| 10 |
for node, softmax_value in zip(nodes, softmax):
|
| 11 |
node.set_softmax(softmax_value, n_latents, n_max_latents)
|
| 12 |
|
| 13 |
+
def beam_inference(sd, steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
|
| 14 |
while len(seeds) < n_seeds:
|
| 15 |
seeds.append(random.randint(0, 10**6))
|
| 16 |
captions = steps
|
|
|
|
| 19 |
for i, caption in enumerate(captions):
|
| 20 |
if i == 0:
|
| 21 |
for seed in seeds:
|
| 22 |
+
latents, img = gen_img(sd,caption, seed=seed)
|
| 23 |
new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
|
| 24 |
img, latents, None)
|
| 25 |
nodes_to_explore.append(new_node)
|
|
|
|
| 30 |
current_step_embeddings, current_image_embeddings = [], []
|
| 31 |
if use_rand:
|
| 32 |
seed = random.randint(0, 10 ** 6)
|
| 33 |
+
latents, img = gen_img(sd,caption, seed=seed)
|
| 34 |
new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
|
| 35 |
img, latents, None)
|
| 36 |
parent_childs.append(new_node)
|
|
|
|
| 43 |
for ancestor_idx, ancestor in enumerate(ancestors):
|
| 44 |
for latent in latents_idx:
|
| 45 |
ancestor_latent = ancestor.get_latent(latent)
|
| 46 |
+
latents, img = gen_img(sd,caption, latent=ancestor_latent)
|
| 47 |
new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
|
| 48 |
parent_childs.append(new_node)
|
| 49 |
current_step_embedding, current_image_embedding = new_node.get_features()
|
beam_diffusion.py
CHANGED
|
@@ -2,11 +2,16 @@ from transformers import PretrainedConfig, PreTrainedModel, Pipeline
|
|
| 2 |
import torch
|
| 3 |
|
| 4 |
from BeamDiffusionModel.beamInference import beam_inference
|
|
|
|
|
|
|
| 5 |
# Your custom configuration for the BeamDiffusion model
|
| 6 |
class BeamDiffusionConfig(PretrainedConfig):
|
| 7 |
model_type = "beam_diffusion"
|
| 8 |
-
def __init__(self, latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
|
| 9 |
super().__init__(**kwargs)
|
|
|
|
|
|
|
|
|
|
| 10 |
self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
|
| 11 |
self.n_seeds = n_seeds
|
| 12 |
self.seeds = seeds if seeds else []
|
|
@@ -15,6 +20,12 @@ class BeamDiffusionConfig(PretrainedConfig):
|
|
| 15 |
self.window_size = window_size
|
| 16 |
self.use_rand = use_rand
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
import torch.nn as nn
|
| 19 |
from huggingface_hub import ModelHubMixin
|
| 20 |
# Custom BeamDiffusionModel that performs inference for each step
|
|
@@ -29,14 +40,15 @@ class BeamDiffusionModel(PreTrainedModel, ModelHubMixin):
|
|
| 29 |
|
| 30 |
def forward(self, input_data):
|
| 31 |
images = beam_inference(
|
|
|
|
| 32 |
steps=input_data.get('steps', []),
|
| 33 |
-
latents_idx=
|
| 34 |
-
n_seeds=
|
| 35 |
-
seeds=
|
| 36 |
-
steps_back=
|
| 37 |
-
beam_width=
|
| 38 |
-
window_size=
|
| 39 |
-
use_rand=
|
| 40 |
)
|
| 41 |
return {"images": images}
|
| 42 |
|
|
|
|
| 2 |
import torch
|
| 3 |
|
| 4 |
from BeamDiffusionModel.beamInference import beam_inference
|
| 5 |
+
from BeamDiffusionModel.models.diffusionModel.StableDiffusion import StableDiffusion
|
| 6 |
+
from BeamDiffusionModel.models.diffusionModel.Flux import Flux
|
| 7 |
# Your custom configuration for the BeamDiffusion model
|
| 8 |
class BeamDiffusionConfig(PretrainedConfig):
|
| 9 |
model_type = "beam_diffusion"
|
| 10 |
+
def __init__(self, sd="SD-2.1",latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
|
| 11 |
super().__init__(**kwargs)
|
| 12 |
+
self.sd_name = sd
|
| 13 |
+
self.sd = None
|
| 14 |
+
self.get_model(sd)
|
| 15 |
self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
|
| 16 |
self.n_seeds = n_seeds
|
| 17 |
self.seeds = seeds if seeds else []
|
|
|
|
| 20 |
self.window_size = window_size
|
| 21 |
self.use_rand = use_rand
|
| 22 |
|
| 23 |
+
def get_model(self, sd):
|
| 24 |
+
if self.sd_name == "flux":
|
| 25 |
+
self.sd = Flux()
|
| 26 |
+
elif self.sd_name == "SD-2.1":
|
| 27 |
+
self.sd = StableDiffusion()
|
| 28 |
+
|
| 29 |
import torch.nn as nn
|
| 30 |
from huggingface_hub import ModelHubMixin
|
| 31 |
# Custom BeamDiffusionModel that performs inference for each step
|
|
|
|
| 40 |
|
| 41 |
def forward(self, input_data):
|
| 42 |
images = beam_inference(
|
| 43 |
+
self.config.sd,
|
| 44 |
steps=input_data.get('steps', []),
|
| 45 |
+
latents_idx=self.config.latents_idx,
|
| 46 |
+
n_seeds=self.config.n_seeds,
|
| 47 |
+
seeds=self.config.seeds,
|
| 48 |
+
steps_back=self.config.steps_back,
|
| 49 |
+
beam_width=self.config.beam_width,
|
| 50 |
+
window_size=self.config.window_size,
|
| 51 |
+
use_rand=self.config.use_rand,
|
| 52 |
)
|
| 53 |
return {"images": images}
|
| 54 |
|
requirements.txt
CHANGED
|
@@ -1,75 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
attrs==25.3.0
|
| 9 |
-
certifi==2025.1.31
|
| 10 |
-
charset-normalizer==3.4.1
|
| 11 |
-
click==8.1.8
|
| 12 |
-
diffusers==0.32.2
|
| 13 |
-
docker-pycreds==0.4.0
|
| 14 |
-
eval_type_backport==0.2.2
|
| 15 |
-
filelock==3.18.0
|
| 16 |
-
frozenlist==1.5.0
|
| 17 |
-
fsspec==2025.3.2
|
| 18 |
-
gitdb==4.0.12
|
| 19 |
-
GitPython==3.1.44
|
| 20 |
-
huggingface-hub==0.30.1
|
| 21 |
-
idna==3.10
|
| 22 |
-
importlib_metadata==8.6.1
|
| 23 |
-
Jinja2==3.1.3
|
| 24 |
-
lightning==2.5.1
|
| 25 |
-
lightning-utilities==0.14.3
|
| 26 |
-
MarkupSafe==2.1.5
|
| 27 |
-
mpmath==1.3.0
|
| 28 |
-
multidict==6.3.2
|
| 29 |
-
networkx==3.2.1
|
| 30 |
-
numpy==2.0.2
|
| 31 |
-
nvidia-cublas-cu12==12.6.4.1
|
| 32 |
-
nvidia-cuda-cupti-cu12==12.6.80
|
| 33 |
-
nvidia-cuda-nvrtc-cu12==12.6.77
|
| 34 |
-
nvidia-cuda-runtime-cu12==12.6.77
|
| 35 |
-
nvidia-cudnn-cu12==9.5.1.17
|
| 36 |
-
nvidia-cufft-cu12==11.3.0.4
|
| 37 |
-
nvidia-curand-cu12==10.3.7.77
|
| 38 |
-
nvidia-cusolver-cu12==11.7.1.2
|
| 39 |
-
nvidia-cusparse-cu12==12.5.4.2
|
| 40 |
-
nvidia-cusparselt-cu12==0.6.3
|
| 41 |
-
nvidia-nccl-cu12==2.21.5
|
| 42 |
-
nvidia-nvjitlink-cu12==12.6.85
|
| 43 |
-
nvidia-nvtx-cu12==12.6.77
|
| 44 |
-
packaging==24.2
|
| 45 |
-
pillow==11.0.0
|
| 46 |
-
platformdirs==4.3.7
|
| 47 |
-
propcache==0.3.1
|
| 48 |
-
protobuf==5.29.4
|
| 49 |
-
psutil==7.0.0
|
| 50 |
-
pydantic==2.11.2
|
| 51 |
-
pydantic_core==2.33.1
|
| 52 |
-
pytorch-lightning==2.5.1
|
| 53 |
PyYAML==6.0.2
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
sentry-sdk==2.25.1
|
| 58 |
-
setproctitle==1.3.5
|
| 59 |
-
six==1.17.0
|
| 60 |
-
smmap==5.0.2
|
| 61 |
-
sympy==1.13.1
|
| 62 |
-
tokenizers==0.21.1
|
| 63 |
-
torch==2.6.0+cu126
|
| 64 |
-
torchaudio==2.6.0+cu126
|
| 65 |
-
torchmetrics==1.7.0
|
| 66 |
-
torchvision==0.21.0+cu126
|
| 67 |
-
tqdm==4.67.1
|
| 68 |
-
transformers==4.51.0
|
| 69 |
-
triton==3.2.0
|
| 70 |
-
typing-inspection==0.4.0
|
| 71 |
-
typing_extensions==4.13.1
|
| 72 |
-
urllib3==2.3.0
|
| 73 |
-
wandb==0.19.9
|
| 74 |
-
yarl==1.19.0
|
| 75 |
-
zipp==3.21.0
|
|
|
|
| 1 |
+
diffusers==0.33.1
|
| 2 |
+
huggingface_hub==0.29.1
|
| 3 |
+
lightning==2.4.0
|
| 4 |
+
numpy==2.2.6
|
| 5 |
+
Pillow==11.2.1
|
| 6 |
+
pytorch_lightning==2.4.0
|
| 7 |
+
PyYAML==6.0.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
PyYAML==6.0.2
|
| 9 |
+
torch==2.4.1
|
| 10 |
+
transformers==4.49.0
|
| 11 |
+
wandb==0.19.11
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|