Ammar Vohra commited on
Commit
8ea3b3e
·
1 Parent(s): 9fe3668

initial commit with app files

Browse files
Files changed (2) hide show
  1. app.py +224 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import spaces
5
+ import torch
6
+ import os
7
+ from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
8
+ from diffusers.utils import load_image
9
+ from peft import PeftModel
10
+ from PIL import Image
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # Paths (update as needed)
14
+ base_model = "stabilityai/stable-diffusion-xl-base-1.0"
15
+ LORA_REPO_ID = "azad-uddin/blocky-character-uv"
16
+ LORA_FILENAME = "blocky-character.safetensors" # Or whatever your LoRA file is named
17
+ controlnet_model = "lllyasviel/sd-controlnet-canny" # or your own
18
+ INPUT_IMAGE_PATH = "uv_outline.png"
19
+ prompt = "blockychar, futuristic knight, UV Texture"
20
+
21
+ if torch.cuda.is_available():
22
+ dtype = torch.bfloat16
23
+ else:
24
+ dtype = torch.float32
25
+ device = "cuda" if torch.cuda.is_available() else "cpu"
26
+
27
+ print(f"Loading model: {base_model} on device {device}...")
28
+ # Load ControlNet and pipeline
29
+ controlnet = ControlNetModel.from_pretrained(controlnet_model, torch_dtype=dtype)
30
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
31
+ base_model,
32
+ controlnet=controlnet,
33
+ torch_dtype=dtype
34
+ )
35
+ lora_path = hf_hub_download(
36
+ repo_id=LORA_REPO_ID,
37
+ filename=LORA_FILENAME,
38
+ use_auth_token=os.getenv("hf_token") # Use HF_TOKEN for private LoRA repo
39
+ )
40
+ pipe.load_lora_weights(os.path.dirname(lora_path), weight_name=LORA_FILENAME)
41
+ pipe.to(device)
42
+
43
+ input_image_path = hf_hub_download(
44
+ repo_id=LORA_REPO_ID,
45
+ filename=INPUT_IMAGE_PATH,
46
+ use_auth_token=os.getenv("hf_token")
47
+ )
48
+ # Load UV outline image
49
+ control_image = Image.open(input_image_path).convert("RGB")
50
+
51
+ MAX_SEED = np.iinfo(np.int32).max
52
+ MAX_IMAGE_SIZE = 1024 # Or 512 if using SD 1.5 base
53
+ @spaces.GPU(duration=65) # Adjust duration as needed
54
+ def generate_token(
55
+ prompt: str,
56
+ negative_prompt: str = "",
57
+ lora_scale: float = 1.0, # Control how much the LoRA influences the output
58
+ seed: int = 42,
59
+ randomize_seed: bool = False,
60
+ width: int = 1024,
61
+ height: int = 1024,
62
+ guidance_scale: float = 7.0,
63
+ num_inference_steps: int = 30,
64
+ progress=gr.Progress(track_tqdm=True),
65
+ ):
66
+ if randomize_seed:
67
+ seed = random.randint(0, MAX_SEED)
68
+
69
+ generator = torch.Generator(device=device).manual_seed(seed)
70
+
71
+ # Generate the image
72
+ image = pipe(
73
+ prompt=prompt,
74
+ negative_prompt=negative_prompt,
75
+ controlnet_conditioning_image=control_image.resize((1024, 1024)),
76
+ guidance_scale=guidance_scale,
77
+ num_inference_steps=num_inference_steps,
78
+ width=width,
79
+ height=height,
80
+ generator=generator,
81
+ # This is how you apply LoRA scale during inference without fusing/unfusing
82
+ cross_attention_kwargs={"scale": lora_scale},
83
+ # good_vae=vae,
84
+ ).images[0]
85
+
86
+ # Save as PNG
87
+ output_dir = "outputs"
88
+ os.makedirs(output_dir, exist_ok=True)
89
+ output_path = f"{output_dir}/generated_uvTexture_{seed}.png"
90
+ image.save(output_path, "PNG")
91
+
92
+ return output_path, seed
93
+
94
+ examples = [
95
+ "blockychar, Ironman with damaged suit, UV Texture",
96
+ "blockychar, Batman character with dark cape and cowl, UV Texture",
97
+ "blockychar, Donald Duck in dress of Trump, UV Texture",
98
+ ]
99
+
100
+ css = """
101
+ #col-container {
102
+ margin: 0 auto;
103
+ max-width: 640px;
104
+ }
105
+ """
106
+
107
+ with gr.Blocks(css=css) as demo:
108
+ with gr.Column(elem_id="col-container"):
109
+ gr.Markdown("# Custom Crypto Token Generator")
110
+ gr.Markdown(
111
+ "Generate unique crypto token images based on your text prompts. "
112
+ "This Space uses a fine-tuned LoRA model to understand 'token' concepts."
113
+ "\n\n**Instructions:** Describe your desired token, including its theme, materials, and style. "
114
+ "For best results, include your LoRA's trigger word `tokenart style`"
115
+ "and descriptive terms like `metallic`, `circular`, `glowing`."
116
+ )
117
+
118
+ with gr.Row():
119
+ prompt = gr.Text(
120
+ label="Prompt",
121
+ show_label=False,
122
+ max_lines=1,
123
+ placeholder="e.g., 'ironman tokenart, metallic, red and gold, arc reactor, futuristic, 3d render'",
124
+ container=False,
125
+ )
126
+ run_button = gr.Button("Generate Token", scale=0, variant="primary")
127
+
128
+ result_image = gr.Image(label="Generated Crypto Token", show_label=True, type="filepath")
129
+
130
+ with gr.Accordion("Advanced Settings", open=False):
131
+ negative_prompt = gr.Text(
132
+ label="Negative prompt",
133
+ max_lines=1,
134
+ placeholder="e.g., blurry, low quality, text, watermark",
135
+ value="blurry, low quality, text, watermark, deformed, bad anatomy",
136
+ )
137
+ lora_scale = gr.Slider(
138
+ label="LoRA Scale",
139
+ minimum=0.0,
140
+ maximum=1.0,
141
+ step=0.05,
142
+ value=0.8, # Recommended to start slightly below 1.0
143
+ info="Controls how much the LoRA influences the generation. 0.0 for no LoRA, 1.0 for full effect."
144
+ )
145
+ seed = gr.Slider(
146
+ label="Seed",
147
+ minimum=0,
148
+ maximum=MAX_SEED,
149
+ step=1,
150
+ value=0,
151
+ )
152
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
153
+
154
+ with gr.Row():
155
+ width = gr.Slider(
156
+ label="Width",
157
+ minimum=512,
158
+ maximum=MAX_IMAGE_SIZE,
159
+ step=64,
160
+ value=1024,
161
+ )
162
+ height = gr.Slider(
163
+ label="Height",
164
+ minimum=512,
165
+ maximum=MAX_IMAGE_SIZE,
166
+ step=64,
167
+ value=1024,
168
+ )
169
+ with gr.Row():
170
+ guidance_scale = gr.Slider(
171
+ label="Guidance scale (CFG)",
172
+ minimum=1.0,
173
+ maximum=15.0,
174
+ step=0.5,
175
+ value=7.0,
176
+ )
177
+ num_inference_steps = gr.Slider(
178
+ label="Inference Steps",
179
+ minimum=10,
180
+ maximum=100,
181
+ step=5,
182
+ value=30,
183
+ )
184
+
185
+ gr.Examples(
186
+ examples=examples,
187
+ inputs=[prompt],
188
+ outputs=[result_image, seed],
189
+ fn=generate_token,
190
+ cache_examples=True,
191
+ cache_mode="lazy"
192
+ )
193
+ gr.on(
194
+ triggers=[run_button.click, prompt.submit],
195
+ fn=generate_token,
196
+ inputs=[
197
+ prompt,
198
+ negative_prompt,
199
+ lora_scale,
200
+ seed,
201
+ randomize_seed,
202
+ width,
203
+ height,
204
+ guidance_scale,
205
+ num_inference_steps,
206
+ ],
207
+ outputs=[result_image, seed],
208
+ )
209
+
210
+ if __name__ == "__main__":
211
+ demo.launch()
212
+ # Generate image
213
+ # result = pipe(
214
+ # prompt=prompt,
215
+ # negative_prompt="",
216
+ # num_inference_steps=28,
217
+ # guidance_scale=4,
218
+ # controlnet_conditioning_image=control_image,
219
+ # height=1024,
220
+ # width=1024
221
+ # ).images[0]
222
+
223
+ # result.save("generated_uv_texture.png")
224
+ # print("Saved: generated_uv_texture.png")
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/diffusers.git
2
+ git+https://github.com/huggingface/peft.git
3
+ gradio==4.29.0 # Explicitly pin Gradio to a known stable version
4
+ accelerate
5
+ diffusers
6
+ torch
7
+ numpy
8
+ transformers
9
+ xformers
10
+ sentencepiece
11
+ scipy
12
+ pydantic==2.10.6 # Pinning pydantic to avoid potential conflicts with Gradio
13
+ Pillow
14
+ spaces