Upload 5 files

Browse files

Files changed (4) hide show

README.md +10 -10
beamInference.py +4 -4
beam_diffusion.py +20 -8
requirements.txt +10 -74

README.md CHANGED Viewed

@@ -27,8 +27,11 @@ python3 -m venv beam_env
 source beam_env/bin/activate  # On macOS/Linux
 # beam_env\Scripts\activate    # On Windows
 # 3. Install required dependencies
-pip install -r ./BeamDiffusionModel/requirements.txt
 ```
 ---
 ## 🚀 Quickstart Guide
@@ -37,26 +40,21 @@ Here's a basic example of how to use BeamDiffusion with the `transformers` libra
 ```python
 from huggingface_hub import snapshot_download
 # Download the model snapshot
 snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
-from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig,BeamDiffusionModel
 # Initialize the configuration, model, and pipeline
-config = BeamDiffusionConfig()
 model = BeamDiffusionModel(config)
 pipe = BeamDiffusionPipeline(model)
 # Define the input parameters
 input_data = {
     "steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
               "A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
               "Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
-    "latents_idx": [0, 1, 2, 3],
-    "n_seeds": 4,
-    "steps_back": 2,
-    "beam_width": 2,
-    "window_size": 2,
-    "use_rand": True
 }
 # Generate the sequence of images
@@ -69,6 +67,8 @@ sequence_imgs = pipe(input_data)
 ## 🔍 Input Parameters Explained
 - **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
 - **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.

 source beam_env/bin/activate  # On macOS/Linux
 # beam_env\Scripts\activate    # On Windows
+pip install huggingface_hub
+huggingface-cli download Gui28F/BeamDiffusion2 --include requirements.txt  --local-dir .
 # 3. Install required dependencies
+pip install -r ./requirements.txt
 ```
 ---
 ## 🚀 Quickstart Guide
 ```python
 from huggingface_hub import snapshot_download
 # Download the model snapshot
 snapshot_download(repo_id="Gui28F/BeamDiffusion", local_dir="BeamDiffusionModel")
+from BeamDiffusionModel.beam_diffusion import BeamDiffusionPipeline, BeamDiffusionConfig, BeamDiffusionModel
 # Initialize the configuration, model, and pipeline
+config = BeamDiffusionConfig(sd="SD-2.1", latents_idx=[0, 1, 2, 3], n_seeds=4, steps_back=2, beam_width=2, window_size=2, use_rand=True)
 model = BeamDiffusionModel(config)
 pipe = BeamDiffusionPipeline(model)
 # Define the input parameters
 input_data = {
     "steps": ["A lively outdoor celebration with guests gathered around, everyone excited to support the event.",
               "A chef in a cooking uniform raises one hand dramatically, signaling it's time to serve the food.",
               "Guests chat and laugh in a vibrant setting, with people gathered around tables, enjoying the event."],
 }
 # Generate the sequence of images
 ## 🔍 Input Parameters Explained
+- **`sd`** (`str`): The base model to use for image generation. The available options are `SD-2.1` and `flux`.
 - **`steps`** (`list of strings`): Descriptions for each step in the image generation process. The model generates one image per step, forming a sequence that aligns with these descriptions.
 - **`latents_idx`** (`list of integers`): Indices referring to specific positions in the latent space to be used during image generation. This allows the model to leverage different latent representations for diverse outputs.

beamInference.py CHANGED Viewed

@@ -10,7 +10,7 @@ def set_softmax(nodes, softmax, n_latents, n_max_latents):
     for node, softmax_value in zip(nodes, softmax):
         node.set_softmax(softmax_value, n_latents, n_max_latents)
-def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
     while len(seeds) < n_seeds:
         seeds.append(random.randint(0, 10**6))
     captions = steps
@@ -19,7 +19,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
     for i, caption in enumerate(captions):
         if i == 0:
             for seed in seeds:
-                latents, img = gen_img(caption, seed=seed)
                 new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
                                          img, latents, None)
                 nodes_to_explore.append(new_node)
@@ -30,7 +30,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
                 current_step_embeddings, current_image_embeddings = [], []
                 if use_rand:
                     seed = random.randint(0, 10 ** 6)
-                    latents, img  = gen_img(caption, seed=seed)
                     new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
                                              img, latents, None)
                     parent_childs.append(new_node)
@@ -43,7 +43,7 @@ def beam_inference(steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_
                 for ancestor_idx, ancestor in enumerate(ancestors):
                     for latent in latents_idx:
                         ancestor_latent = ancestor.get_latent(latent)
-                        latents, img = gen_img(caption, latent=ancestor_latent)
                         new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
                         parent_childs.append(new_node)
                         current_step_embedding, current_image_embedding = new_node.get_features()

     for node, softmax_value in zip(nodes, softmax):
         node.set_softmax(softmax_value, n_latents, n_max_latents)
+def beam_inference(sd, steps, latents_idx, n_seeds= 1, seeds=[], steps_back=2, beam_width=4, window_size=2, use_rand=True):
     while len(seeds) < n_seeds:
         seeds.append(random.randint(0, 10**6))
     captions = steps
     for i, caption in enumerate(captions):
         if i == 0:
             for seed in seeds:
+                latents, img = gen_img(sd,caption, seed=seed)
                 new_node = tree.add_node(tree.root, caption, i + 1, "Rand Seed", "Rand Seed",
                                          img, latents, None)
                 nodes_to_explore.append(new_node)
                 current_step_embeddings, current_image_embeddings = [], []
                 if use_rand:
                     seed = random.randint(0, 10 ** 6)
+                    latents, img  = gen_img(sd,caption, seed=seed)
                     new_node = tree.add_node(parent_node, caption, i + 1, "Rand Seed", "Rand Seed",
                                              img, latents, None)
                     parent_childs.append(new_node)
                 for ancestor_idx, ancestor in enumerate(ancestors):
                     for latent in latents_idx:
                         ancestor_latent = ancestor.get_latent(latent)
+                        latents, img = gen_img(sd,caption, latent=ancestor_latent)
                         new_node = tree.add_node(parent_node, caption, i + 1, ancestor.step, latent,img, latents, None)
                         parent_childs.append(new_node)
                         current_step_embedding, current_image_embedding = new_node.get_features()

beam_diffusion.py CHANGED Viewed

@@ -2,11 +2,16 @@ from transformers import PretrainedConfig, PreTrainedModel, Pipeline
 import torch
 from BeamDiffusionModel.beamInference import beam_inference
 # Your custom configuration for the BeamDiffusion model
 class BeamDiffusionConfig(PretrainedConfig):
     model_type = "beam_diffusion"
-    def __init__(self, latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
         super().__init__(**kwargs)
         self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
         self.n_seeds = n_seeds
         self.seeds = seeds if seeds else []
@@ -15,6 +20,12 @@ class BeamDiffusionConfig(PretrainedConfig):
         self.window_size = window_size
         self.use_rand = use_rand
 import torch.nn as nn
 from huggingface_hub import ModelHubMixin
 # Custom BeamDiffusionModel that performs inference for each step
@@ -29,14 +40,15 @@ class BeamDiffusionModel(PreTrainedModel, ModelHubMixin):
     def forward(self, input_data):
         images = beam_inference(
             steps=input_data.get('steps', []),
-            latents_idx=input_data.get("latents_idx", [0, 1, 2, 3]),
-            n_seeds=input_data.get("n_seeds", 4),
-            seeds=input_data.get("seeds", []),
-            steps_back=input_data.get("steps_back", 2),
-            beam_width=input_data.get("beam_width", 4),
-            window_size=input_data.get("window_size", 2),
-            use_rand=input_data.get("use_rand", True)
         )
         return {"images": images}

 import torch
 from BeamDiffusionModel.beamInference import beam_inference
+from BeamDiffusionModel.models.diffusionModel.StableDiffusion import StableDiffusion
+from BeamDiffusionModel.models.diffusionModel.Flux import Flux
 # Your custom configuration for the BeamDiffusion model
 class BeamDiffusionConfig(PretrainedConfig):
     model_type = "beam_diffusion"
+    def __init__(self, sd="SD-2.1",latents_idx=None, n_seeds=4, seeds=None, steps_back=2, beam_width=4, window_size=2, use_rand=True, **kwargs):
         super().__init__(**kwargs)
+        self.sd_name = sd
+        self.sd = None
+        self.get_model(sd)
         self.latents_idx = latents_idx if latents_idx else [0, 1, 2, 3]
         self.n_seeds = n_seeds
         self.seeds = seeds if seeds else []
         self.window_size = window_size
         self.use_rand = use_rand
+    def get_model(self, sd):
+        if self.sd_name == "flux":
+            self.sd = Flux()
+        elif self.sd_name == "SD-2.1":
+            self.sd = StableDiffusion()
 import torch.nn as nn
 from huggingface_hub import ModelHubMixin
 # Custom BeamDiffusionModel that performs inference for each step
     def forward(self, input_data):
         images = beam_inference(
+            self.config.sd,
             steps=input_data.get('steps', []),
+            latents_idx=self.config.latents_idx,
+            n_seeds=self.config.n_seeds,
+            seeds=self.config.seeds,
+            steps_back=self.config.steps_back,
+            beam_width=self.config.beam_width,
+            window_size=self.config.window_size,
+            use_rand=self.config.use_rand,
         )
         return {"images": images}

requirements.txt CHANGED Viewed

@@ -1,75 +1,11 @@
---extra-index-url https://download.pytorch.org/whl/cu126
-accelerate==1.6.0
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.16
-aiosignal==1.3.2
-annotated-types==0.7.0
-async-timeout==5.0.1
-attrs==25.3.0
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-diffusers==0.32.2
-docker-pycreds==0.4.0
-eval_type_backport==0.2.2
-filelock==3.18.0
-frozenlist==1.5.0
-fsspec==2025.3.2
-gitdb==4.0.12
-GitPython==3.1.44
-huggingface-hub==0.30.1
-idna==3.10
-importlib_metadata==8.6.1
-Jinja2==3.1.3
-lightning==2.5.1
-lightning-utilities==0.14.3
-MarkupSafe==2.1.5
-mpmath==1.3.0
-multidict==6.3.2
-networkx==3.2.1
-numpy==2.0.2
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.21.5
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-packaging==24.2
-pillow==11.0.0
-platformdirs==4.3.7
-propcache==0.3.1
-protobuf==5.29.4
-psutil==7.0.0
-pydantic==2.11.2
-pydantic_core==2.33.1
-pytorch-lightning==2.5.1
 PyYAML==6.0.2
-regex==2024.11.6
-requests==2.32.3
-safetensors==0.5.3
-sentry-sdk==2.25.1
-setproctitle==1.3.5
-six==1.17.0
-smmap==5.0.2
-sympy==1.13.1
-tokenizers==0.21.1
-torch==2.6.0+cu126
-torchaudio==2.6.0+cu126
-torchmetrics==1.7.0
-torchvision==0.21.0+cu126
-tqdm==4.67.1
-transformers==4.51.0
-triton==3.2.0
-typing-inspection==0.4.0
-typing_extensions==4.13.1
-urllib3==2.3.0
-wandb==0.19.9
-yarl==1.19.0
-zipp==3.21.0

+diffusers==0.33.1
+huggingface_hub==0.29.1
+lightning==2.4.0
+numpy==2.2.6
+Pillow==11.2.1
+pytorch_lightning==2.4.0
+PyYAML==6.0.1
 PyYAML==6.0.2
+torch==2.4.1
+transformers==4.49.0
+wandb==0.19.11