GheeButter commited on
Commit
7fc0f19
·
1 Parent(s): 0fa4a31

{commit_message}

Browse files
Files changed (4) hide show
  1. .gitattributes +3 -35
  2. .gitignore +3 -0
  3. .official_space.py +3 -0
  4. app.py +55 -18
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d0f88d026949b750f6dc362dcaf73fbe8da034ed60233906b44caabcb834a4
3
+ size 1559
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0304952213fb0fb8f566b4d082c631e9c5274ca73a060a5098441159f01d92ea
3
+ size 138
.official_space.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51292da76658340750a198cba125f12668ff88f79248ffc920e168b645698b43
3
+ size 24588
app.py CHANGED
@@ -2,26 +2,60 @@ import gradio as gr
2
  import numpy as np
3
  import random
4
  import os
 
5
 
6
  import spaces
7
- from diffusers import DiffusionPipeline
 
 
8
  import torch
 
9
  from huggingface_hub import InferenceClient
10
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model_repo_id = "Tongyi-MAI/Z-Image-Turbo"
13
 
14
- if torch.cuda.is_available():
15
- torch_dtype = torch.float16
16
- else:
17
- torch_dtype = torch.float32
18
-
19
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
20
- pipe = pipe.to(device)
21
-
22
  MAX_SEED = np.iinfo(np.int32).max
23
  MAX_IMAGE_SIZE = 1024
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Initialize LLM for prompt enhancement
26
  llm_client = InferenceClient()
27
 
@@ -39,9 +73,8 @@ Keep the core concept but make it more descriptive. Return only the enhanced pro
39
 
40
  response = llm_client.chat_completion(
41
  messages=messages,
42
- model="meta-llama/Llama-3.3-70B-Instruct",
43
- max_tokens=200,
44
- temperature=0.7,
45
  )
46
 
47
  enhanced = response.choices[0].message.content.strip()
@@ -72,16 +105,20 @@ def infer(
72
  if randomize_seed:
73
  seed = random.randint(0, MAX_SEED)
74
 
75
- generator = torch.Generator().manual_seed(seed)
 
 
 
 
76
 
77
  image = pipe(
78
  prompt=prompt,
79
- negative_prompt=negative_prompt,
 
80
  guidance_scale=guidance_scale,
81
  num_inference_steps=num_inference_steps,
82
- width=width,
83
- height=height,
84
  generator=generator,
 
85
  ).images[0]
86
 
87
  return image, seed, prompt
@@ -171,7 +208,7 @@ with gr.Blocks(css=css) as demo:
171
  minimum=0.0,
172
  maximum=10.0,
173
  step=0.1,
174
- value=0.0, # Replace with defaults that work for your model
175
  )
176
 
177
  num_inference_steps = gr.Slider(
@@ -179,7 +216,7 @@ with gr.Blocks(css=css) as demo:
179
  minimum=1,
180
  maximum=50,
181
  step=1,
182
- value=2, # Replace with defaults that work for your model
183
  )
184
 
185
  gr.Examples(examples=examples, inputs=[prompt])
 
2
  import numpy as np
3
  import random
4
  import os
5
+ import re
6
 
7
  import spaces
8
+ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
9
+ from diffusers import ZImagePipeline
10
+ from diffusers.models.transformers.transformer_z_image import ZImageTransformer2DModel
11
  import torch
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer
13
  from huggingface_hub import InferenceClient
14
 
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
  model_repo_id = "Tongyi-MAI/Z-Image-Turbo"
17
 
 
 
 
 
 
 
 
 
18
  MAX_SEED = np.iinfo(np.int32).max
19
  MAX_IMAGE_SIZE = 1024
20
 
21
+ # Load Z-Image model components
22
+ print(f"Loading models from {model_repo_id}...")
23
+
24
+ vae = AutoencoderKL.from_pretrained(
25
+ model_repo_id,
26
+ subfolder="vae",
27
+ torch_dtype=torch.bfloat16,
28
+ device_map="cuda",
29
+ )
30
+
31
+ text_encoder = AutoModelForCausalLM.from_pretrained(
32
+ model_repo_id,
33
+ subfolder="text_encoder",
34
+ torch_dtype=torch.bfloat16,
35
+ device_map="cuda",
36
+ ).eval()
37
+
38
+ tokenizer = AutoTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer")
39
+ tokenizer.padding_side = "left"
40
+
41
+ pipe = ZImagePipeline(
42
+ scheduler=None,
43
+ vae=vae,
44
+ text_encoder=text_encoder,
45
+ tokenizer=tokenizer,
46
+ transformer=None
47
+ )
48
+
49
+ transformer = ZImageTransformer2DModel.from_pretrained(
50
+ model_repo_id,
51
+ subfolder="transformer"
52
+ ).to("cuda", torch.bfloat16)
53
+
54
+ pipe.transformer = transformer
55
+ pipe.to("cuda", torch.bfloat16)
56
+
57
+ print("Model loaded successfully!")
58
+
59
  # Initialize LLM for prompt enhancement
60
  llm_client = InferenceClient()
61
 
 
73
 
74
  response = llm_client.chat_completion(
75
  messages=messages,
76
+ model="Qwen/Qwen3-VL-30B-A3B-Instruct",
77
+ max_tokens=100,
 
78
  )
79
 
80
  enhanced = response.choices[0].message.content.strip()
 
105
  if randomize_seed:
106
  seed = random.randint(0, MAX_SEED)
107
 
108
+ generator = torch.Generator("cuda").manual_seed(seed)
109
+
110
+ # Create scheduler with shift parameter
111
+ scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0)
112
+ pipe.scheduler = scheduler
113
 
114
  image = pipe(
115
  prompt=prompt,
116
+ height=height,
117
+ width=width,
118
  guidance_scale=guidance_scale,
119
  num_inference_steps=num_inference_steps,
 
 
120
  generator=generator,
121
+ max_sequence_length=512,
122
  ).images[0]
123
 
124
  return image, seed, prompt
 
208
  minimum=0.0,
209
  maximum=10.0,
210
  step=0.1,
211
+ value=0.0,
212
  )
213
 
214
  num_inference_steps = gr.Slider(
 
216
  minimum=1,
217
  maximum=50,
218
  step=1,
219
+ value=8,
220
  )
221
 
222
  gr.Examples(examples=examples, inputs=[prompt])