Gui28F commited on
Commit
90bb09b
·
verified ·
1 Parent(s): d7977ea

Upload 5 files

Browse files
Files changed (4) hide show
  1. README.md +10 -10
  2. beamInference.py +4 -4
  3. beam_diffusion.py +20 -8
  4. requirements.txt +10 -74
README.md CHANGED
@@ -27,8 +27,11 @@ python3 -m venv beam_env
27
  source beam_env/bin/activate # On macOS/Linux
28
  # beam_env\Scripts\activate # On Windows
29
 
 
 
 
30
  # 3. Install required dependencies
31
- pip install -r ./BeamDiffusionModel/requirements.txt
32
  ```
33
  ---
34
  ## 🚀 Quickstart Guide
@@ -37,26 +40,21 @@ Here's a basic example of how to use BeamDiffusion with the `transformers` libra
37
 
38
  ```python
39
  from huggingface_hub import snapshot_download
 
40
  # Download the model snapshot
41
  snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
42
- from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig,BeamDiffusionModel
43
 
44
  # Initialize the configuration, model, and pipeline
45
- config = BeamDiffusionConfig()
46
  model = BeamDiffusionModel(config)
47
  pipe = BeamDiffusionPipeline(model)
48
-
49
  # Define the input parameters
50
  input_data = {
51
  "steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
52
  "A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
53
  "Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
54
- "latents_idx": [0, 1, 2, 3],
55
- "n_seeds": 4,
56
- "steps_back": 2,
57
- "beam_width": 2,
58
- "window_size": 2,
59
- "use_rand": True
60
  }
61
 
62
  # Generate the sequence of images
@@ -69,6 +67,8 @@ sequence_imgs = pipe(input_data)
69
 
70
  ## 🔍 Input Parameters Explained
71
 
 
 
72
  - **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
73
 
74
  - **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.
 
27
  source beam_env/bin/activate # On macOS/Linux
28
  # beam_env\Scripts\activate # On Windows
29
 
30
+ pip install huggingface_hub
31
+ huggingface-cli download Gui28F/BeamDiffusion2 --include requirements.txt --local-dir .
32
+
33
  # 3. Install required dependencies
34
+ pip install -r ./requirements.txt
35
  ```
36
  ---
37
  ## 🚀 Quickstart Guide
 
40
 
41
  ```python
42
  from huggingface_hub import snapshot_download
43
+
44
  # Download the model snapshot
45
  snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
46
+ from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig, BeamDiffusionModel
47
 
48
  # Initialize the configuration, model, and pipeline
49
+ config = BeamDiffusionConfig(sd="SD-2.1", latents_idx=[0, 1, 2, 3], n_seeds=4, steps_back=2, beam_width=2, window_size=2, use_rand=True)
50
  model = BeamDiffusionModel(config)
51
  pipe = BeamDiffusionPipeline(model)
52
+
53
  # Define the input parameters
54
  input_data = {
55
  "steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
56
  "A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
57
  "Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
 
 
 
 
 
 
58
  }
59
 
60
  # Generate the sequence of images
 
67
 
68
  ## 🔍 Input Parameters Explained
69
 
70
+ - **`sd`** (`str`): The base model to use for image generation. The available options are `SD-2.1` and `flux`.
71
+
72
  - **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
73
 
74
  - **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.
beamInference.py CHANGED
@@ -10,7 +10,7 @@ def set_softmax(nodes, softmax, n_latents, n_max_latents):
10
  for node, softmax_value in zip(nodes, softmax):
11
  node.set_softmax(softmax_value, n_latents, n_max_latents)
12
 
13
- def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
14
  while len(seeds) < n_seeds:
15
  seeds.append(random.randint(0, 10**6))
16
  captions = steps
@@ -19,7 +19,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
19
  for i, caption in enumerate(captions):
20
  if i == 0:
21
  for seed in seeds:
22
- latents, img = gen_img(caption, seed=seed)
23
  new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
24
  img, latents, None)
25
  nodes_to_explore.append(new_node)
@@ -30,7 +30,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
30
  current_step_embeddings, current_image_embeddings = [], []
31
  if use_rand:
32
  seed = random.randint(0, 10 ** 6)
33
- latents, img = gen_img(caption, seed=seed)
34
  new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
35
  img, latents, None)
36
  parent_childs.append(new_node)
@@ -43,7 +43,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
43
  for ancestor_idx, ancestor in enumerate(ancestors):
44
  for latent in latents_idx:
45
  ancestor_latent = ancestor.get_latent(latent)
46
- latents, img = gen_img(caption, latent=ancestor_latent)
47
  new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
48
  parent_childs.append(new_node)
49
  current_step_embedding, current_image_embedding = new_node.get_features()
 
10
  for node, softmax_value in zip(nodes, softmax):
11
  node.set_softmax(softmax_value, n_latents, n_max_latents)
12
 
13
+ def beam_inference(sd, steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
14
  while len(seeds) < n_seeds:
15
  seeds.append(random.randint(0, 10**6))
16
  captions = steps
 
19
  for i, caption in enumerate(captions):
20
  if i == 0:
21
  for seed in seeds:
22
+ latents, img = gen_img(sd,caption, seed=seed)
23
  new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
24
  img, latents, None)
25
  nodes_to_explore.append(new_node)
 
30
  current_step_embeddings, current_image_embeddings = [], []
31
  if use_rand:
32
  seed = random.randint(0, 10 ** 6)
33
+ latents, img = gen_img(sd,caption, seed=seed)
34
  new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
35
  img, latents, None)
36
  parent_childs.append(new_node)
 
43
  for ancestor_idx, ancestor in enumerate(ancestors):
44
  for latent in latents_idx:
45
  ancestor_latent = ancestor.get_latent(latent)
46
+ latents, img = gen_img(sd,caption, latent=ancestor_latent)
47
  new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
48
  parent_childs.append(new_node)
49
  current_step_embedding, current_image_embedding = new_node.get_features()
beam_diffusion.py CHANGED
@@ -2,11 +2,16 @@ from transformers import PretrainedConfig, PreTrainedModel, Pipeline
2
  import torch
3
 
4
  from BeamDiffusionModel.beamInference import beam_inference
 
 
5
  # Your custom configuration for the BeamDiffusion model
6
  class BeamDiffusionConfig(PretrainedConfig):
7
  model_type = "beam_diffusion"
8
- def __init__(self, latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
9
  super().__init__(**kwargs)
 
 
 
10
  self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
11
  self.n_seeds = n_seeds
12
  self.seeds = seeds if seeds else []
@@ -15,6 +20,12 @@ class BeamDiffusionConfig(PretrainedConfig):
15
  self.window_size = window_size
16
  self.use_rand = use_rand
17
 
 
 
 
 
 
 
18
  import torch.nn as nn
19
  from huggingface_hub import ModelHubMixin
20
  # Custom BeamDiffusionModel that performs inference for each step
@@ -29,14 +40,15 @@ class BeamDiffusionModel(PreTrainedModel, ModelHubMixin):
29
 
30
  def forward(self, input_data):
31
  images = beam_inference(
 
32
  steps=input_data.get('steps', []),
33
- latents_idx=input_data.get("latents_idx", [0, 1, 2, 3]),
34
- n_seeds=input_data.get("n_seeds", 4),
35
- seeds=input_data.get("seeds", []),
36
- steps_back=input_data.get("steps_back", 2),
37
- beam_width=input_data.get("beam_width", 4),
38
- window_size=input_data.get("window_size", 2),
39
- use_rand=input_data.get("use_rand", True)
40
  )
41
  return {"images": images}
42
 
 
2
  import torch
3
 
4
  from BeamDiffusionModel.beamInference import beam_inference
5
+ from BeamDiffusionModel.models.diffusionModel.StableDiffusion import StableDiffusion
6
+ from BeamDiffusionModel.models.diffusionModel.Flux import Flux
7
  # Your custom configuration for the BeamDiffusion model
8
  class BeamDiffusionConfig(PretrainedConfig):
9
  model_type = "beam_diffusion"
10
+ def __init__(self, sd="SD-2.1",latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
11
  super().__init__(**kwargs)
12
+ self.sd_name = sd
13
+ self.sd = None
14
+ self.get_model(sd)
15
  self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
16
  self.n_seeds = n_seeds
17
  self.seeds = seeds if seeds else []
 
20
  self.window_size = window_size
21
  self.use_rand = use_rand
22
 
23
+ def get_model(self, sd):
24
+ if self.sd_name == "flux":
25
+ self.sd = Flux()
26
+ elif self.sd_name == "SD-2.1":
27
+ self.sd = StableDiffusion()
28
+
29
  import torch.nn as nn
30
  from huggingface_hub import ModelHubMixin
31
  # Custom BeamDiffusionModel that performs inference for each step
 
40
 
41
  def forward(self, input_data):
42
  images = beam_inference(
43
+ self.config.sd,
44
  steps=input_data.get('steps', []),
45
+ latents_idx=self.config.latents_idx,
46
+ n_seeds=self.config.n_seeds,
47
+ seeds=self.config.seeds,
48
+ steps_back=self.config.steps_back,
49
+ beam_width=self.config.beam_width,
50
+ window_size=self.config.window_size,
51
+ use_rand=self.config.use_rand,
52
  )
53
  return {"images": images}
54
 
requirements.txt CHANGED
@@ -1,75 +1,11 @@
1
- --extra-index-url https://download.pytorch.org/whl/cu126
2
- accelerate==1.6.0
3
- aiohappyeyeballs==2.6.1
4
- aiohttp==3.11.16
5
- aiosignal==1.3.2
6
- annotated-types==0.7.0
7
- async-timeout==5.0.1
8
- attrs==25.3.0
9
- certifi==2025.1.31
10
- charset-normalizer==3.4.1
11
- click==8.1.8
12
- diffusers==0.32.2
13
- docker-pycreds==0.4.0
14
- eval_type_backport==0.2.2
15
- filelock==3.18.0
16
- frozenlist==1.5.0
17
- fsspec==2025.3.2
18
- gitdb==4.0.12
19
- GitPython==3.1.44
20
- huggingface-hub==0.30.1
21
- idna==3.10
22
- importlib_metadata==8.6.1
23
- Jinja2==3.1.3
24
- lightning==2.5.1
25
- lightning-utilities==0.14.3
26
- MarkupSafe==2.1.5
27
- mpmath==1.3.0
28
- multidict==6.3.2
29
- networkx==3.2.1
30
- numpy==2.0.2
31
- nvidia-cublas-cu12==12.6.4.1
32
- nvidia-cuda-cupti-cu12==12.6.80
33
- nvidia-cuda-nvrtc-cu12==12.6.77
34
- nvidia-cuda-runtime-cu12==12.6.77
35
- nvidia-cudnn-cu12==9.5.1.17
36
- nvidia-cufft-cu12==11.3.0.4
37
- nvidia-curand-cu12==10.3.7.77
38
- nvidia-cusolver-cu12==11.7.1.2
39
- nvidia-cusparse-cu12==12.5.4.2
40
- nvidia-cusparselt-cu12==0.6.3
41
- nvidia-nccl-cu12==2.21.5
42
- nvidia-nvjitlink-cu12==12.6.85
43
- nvidia-nvtx-cu12==12.6.77
44
- packaging==24.2
45
- pillow==11.0.0
46
- platformdirs==4.3.7
47
- propcache==0.3.1
48
- protobuf==5.29.4
49
- psutil==7.0.0
50
- pydantic==2.11.2
51
- pydantic_core==2.33.1
52
- pytorch-lightning==2.5.1
53
  PyYAML==6.0.2
54
- regex==2024.11.6
55
- requests==2.32.3
56
- safetensors==0.5.3
57
- sentry-sdk==2.25.1
58
- setproctitle==1.3.5
59
- six==1.17.0
60
- smmap==5.0.2
61
- sympy==1.13.1
62
- tokenizers==0.21.1
63
- torch==2.6.0+cu126
64
- torchaudio==2.6.0+cu126
65
- torchmetrics==1.7.0
66
- torchvision==0.21.0+cu126
67
- tqdm==4.67.1
68
- transformers==4.51.0
69
- triton==3.2.0
70
- typing-inspection==0.4.0
71
- typing_extensions==4.13.1
72
- urllib3==2.3.0
73
- wandb==0.19.9
74
- yarl==1.19.0
75
- zipp==3.21.0
 
1
+ diffusers==0.33.1
2
+ huggingface_hub==0.29.1
3
+ lightning==2.4.0
4
+ numpy==2.2.6
5
+ Pillow==11.2.1
6
+ pytorch_lightning==2.4.0
7
+ PyYAML==6.0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  PyYAML==6.0.2
9
+ torch==2.4.1
10
+ transformers==4.49.0
11
+ wandb==0.19.11