VcRlAgent commited on
Commit
e0b1280
Β·
1 Parent(s): 810f419

Rollback Working except Avatar

Browse files
Files changed (2) hide show
  1. app.py +10 -158
  2. app copy.py β†’ app.py.wip_avatar +157 -11
app.py CHANGED
@@ -3,95 +3,16 @@
3
  # Author: Vijay S. Chaudhari | 2025
4
  # ==========================================
5
 
6
- import importlib.util
7
  import gradio as gr
8
  import spaces
9
  import torch
10
  import cv2
11
  import numpy as np
12
- from pathlib import Path
13
-
14
  from PIL import Image, ImageEnhance, ImageOps
15
  from rembg import remove
16
  from diffusers import StableDiffusionImg2ImgPipeline
17
- from diffusers import StableDiffusionXLPipeline
18
  import io
19
- import os, sys, subprocess, warnings, logging
20
-
21
- warnings.filterwarnings("ignore", category=UserWarning)
22
- logging.getLogger("onnxruntime").setLevel(logging.ERROR)
23
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
24
-
25
- # --- Ensure InstantID is available ---
26
- if not Path("instantid").exists():
27
- print("πŸ”„ Cloning InstantID repository...")
28
- subprocess.run(["git", "clone", "--depth", "1", "https://github.com/InstantID/InstantID.git", "instantid"],check=True)
29
-
30
- repo_root = Path("instantid").resolve()
31
-
32
- # 🧭 Search for a pipeline file that matches *instantid*.py under the repo
33
- candidates = list(repo_root.rglob("pipeline*instantid*.py"))
34
- if not candidates:
35
- # Fallback common names across commits
36
- fallback_names = [
37
- "pipelines/pipeline_instantid.py",
38
- "pipelines/pipeline_stable_diffusion_instantid.py",
39
- "pipelines/pipeline_stable_diffusion_xl_instantid.py",
40
- ]
41
- for name in fallback_names:
42
- p = repo_root / name
43
- if p.exists():
44
- candidates = [p]
45
- break
46
-
47
- if not candidates:
48
- raise FileNotFoundError(
49
- "Could not locate an InstantID pipeline file under ./instantid. "
50
- "Repo layout may have changed. Please check the repo structure."
51
- )
52
-
53
- pipeline_file = candidates[0]
54
- print(f"βœ… Using InstantID pipeline file: {pipeline_file.relative_to(repo_root)}")
55
-
56
- # πŸͺ„ Import the pipeline module by file path (no package needed)
57
- spec = importlib.util.spec_from_file_location("instantid_pipeline", str(pipeline_file))
58
- instantid_mod = importlib.util.module_from_spec(spec)
59
- spec.loader.exec_module(instantid_mod) # type: ignore
60
-
61
- # πŸ”Ž Pick a pipeline class that looks like an InstantID Pipeline
62
- InstantIDPipeline = None
63
- for attr in dir(instantid_mod):
64
- if "InstantID" in attr and "Pipeline" in attr:
65
- InstantIDPipeline = getattr(instantid_mod, attr)
66
- break
67
-
68
- if InstantIDPipeline is None:
69
- # Helpful diagnostics
70
- print("Available names in module:", [a for a in dir(instantid_mod) if "Pipeline" in a])
71
- raise ImportError(
72
- "Could not find an InstantID pipeline class. "
73
- "Looked for a class name containing both 'InstantID' and 'Pipeline'."
74
- )
75
-
76
- print(f"βœ… Imported pipeline class: {InstantIDPipeline.__name__}")
77
-
78
- '''
79
- if os.path.exists("InstantID") and not os.path.exists("instantid"):
80
- os.rename("InstantID", "instantid")
81
 
82
- instantid_path = os.path.abspath("instantid")
83
- sys.path.append(instantid_path)
84
- sys.path.append(os.path.join(instantid_path, "pipelines"))
85
-
86
- #sys.path.append(os.path.abspath("instantid"))
87
- #sys.path.insert(0, os.path.join(os.getcwd(), 'InstantID'))
88
- try:
89
- from pipelines.pipeline_instantid import InstantIDPipeline
90
- print("βœ… InstantIDPipeline imported successfully.")
91
- except Exception as e:
92
- print("⚠️ Failed to import InstantIDPipeline:", e)
93
- InstantIDPipeline = None # graceful fallback
94
- '''
95
 
96
  import torchvision
97
  print("Printing Torch and TorchVision versions:")
@@ -217,10 +138,12 @@ def create_passport(img: Image.Image) -> Image.Image:
217
 
218
  @spaces.GPU
219
  def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale: float) -> Image.Image:
220
- """
221
- Create a stylized AI avatar while preserving facial identity using InstantID.
222
- Retains core facial features, skin tone, and expressions of the input photo.
223
- """
 
 
224
 
225
  # Stylize with SD prompt. We are selecting these from UI now.
226
  #prompt = "highly detailed, digital portrait, professional lighting, cinematic style, artistic AI avatar"
@@ -228,84 +151,13 @@ def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale
228
  #prompt = "studio portrait, even lighting, neutral background, realistic skin, confident pose"
229
  #prompt = "realistic professional headshot, soft studio lighting, neutral background, crisp details, natural skin tone"
230
 
231
- # --- Convert input ---
232
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
233
- img = img.convert("RGB").resize((512, 512), Image.Resampling.LANCZOS)
234
-
235
- # --- Step 1: Load InstantID + SDXL pipeline ---
236
- pipe = StableDiffusionXLPipeline.from_pretrained(
237
- "stabilityai/stable-diffusion-xl-base-1.0",
238
- torch_dtype=torch.float16
239
- ).to(device)
240
-
241
- instantid = InstantIDPipeline.from_pretrained("InstantID/InstantID", torch_dtype=torch.float16,)
242
- pipe.to("cuda" if torch.cuda.is_available() else "cpu")
243
- #pipe.load_ip_adapter(instantid)
244
-
245
- # --- Step 2: Optimize for ZeroGPU memory ---
246
- pipe.enable_attention_slicing()
247
- pipe.enable_model_cpu_offload()
248
-
249
- # --- Step 3: Prepare conditioning (face embedding) ---
250
- np_img = np.array(img)
251
- bgr_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
252
- face_emb = instantid.extract_face_embedding(bgr_img) # key step: ID embedding guidance
253
-
254
- # --- Step 4: Stylized generation ---
255
- gen = pipe.generate_with_identity(
256
- image=img,
257
- face_embedding=face_emb,
258
- prompt=(
259
- prompt
260
- + ", portrait of the same person, consistent identity, detailed lighting, "
261
- "highly realistic skin texture, cinematic color tones"
262
- ),
263
- strength=float(strength),
264
- guidance_scale=float(guidance_scale),
265
- num_inference_steps=30
266
- )
267
-
268
- avatar = gen.images[0]
269
-
270
- # --- Step 5 (Optional): Post-process with GFPGAN for crispness ---
271
- try:
272
- from gfpgan import GFPGANer
273
- from realesrgan import RealESRGANer
274
- from basicsr.archs.rrdbnet_arch import RRDBNet
275
-
276
- model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
277
- num_block=23, num_grow_ch=32, scale=2)
278
- upsampler = RealESRGANer(
279
- scale=2,
280
- model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
281
- model=model,
282
- tile=400,
283
- tile_pad=10,
284
- pre_pad=0,
285
- half=True,
286
- device=device
287
- )
288
- face_enhancer = GFPGANer(
289
- model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
290
- upscale=1,
291
- arch='clean',
292
- channel_multiplier=2,
293
- bg_upsampler=upsampler,
294
- device=device
295
- )
296
-
297
- img_cv = cv2.cvtColor(np.array(avatar), cv2.COLOR_RGB2BGR)
298
- _, _, restored_img = face_enhancer.enhance(
299
- img_cv, has_aligned=False, only_center_face=False,
300
- paste_back=True, weight=0.4
301
- )
302
- avatar = Image.fromarray(cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB))
303
- except Exception as e:
304
- print(f"[WARN] GFPGAN post-process skipped: {e}")
305
 
 
 
306
  return avatar
307
 
308
-
309
  @spaces.GPU
310
  def process_all(img: Image.Image):
311
  """Process all three types at once"""
 
3
  # Author: Vijay S. Chaudhari | 2025
4
  # ==========================================
5
 
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
  import cv2
10
  import numpy as np
 
 
11
  from PIL import Image, ImageEnhance, ImageOps
12
  from rembg import remove
13
  from diffusers import StableDiffusionImg2ImgPipeline
 
14
  import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  import torchvision
18
  print("Printing Torch and TorchVision versions:")
 
138
 
139
  @spaces.GPU
140
  def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale: float) -> Image.Image:
141
+ """Stylized AI avatar using Stable Diffusion Img2Img with user inputs"""
142
+ # Enhance face
143
+ img_enhanced = enhance_face(img)
144
+
145
+ # Resize for SD (512x512)
146
+ img_resized = img_enhanced.convert("RGB").resize((512, 512))
147
 
148
  # Stylize with SD prompt. We are selecting these from UI now.
149
  #prompt = "highly detailed, digital portrait, professional lighting, cinematic style, artistic AI avatar"
 
151
  #prompt = "studio portrait, even lighting, neutral background, realistic skin, confident pose"
152
  #prompt = "realistic professional headshot, soft studio lighting, neutral background, crisp details, natural skin tone"
153
 
154
+ with torch.autocast("cuda"):
155
+ result = sd_pipe(prompt=prompt, image=img_resized, strength=strength, guidance_scale=guidance_scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ avatar = enhance_face(result.images[0])
158
+
159
  return avatar
160
 
 
161
  @spaces.GPU
162
  def process_all(img: Image.Image):
163
  """Process all three types at once"""
app copy.py β†’ app.py.wip_avatar RENAMED
@@ -3,18 +3,95 @@
3
  # Author: Vijay S. Chaudhari | 2025
4
  # ==========================================
5
 
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
  import cv2
10
  import numpy as np
 
 
11
  from PIL import Image, ImageEnhance, ImageOps
12
  from rembg import remove
13
  from diffusers import StableDiffusionImg2ImgPipeline
14
  from diffusers import StableDiffusionXLPipeline
15
- from instantid import InstantID
16
  import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  import torchvision
20
  print("Printing Torch and TorchVision versions:")
@@ -140,12 +217,10 @@ def create_passport(img: Image.Image) -> Image.Image:
140
 
141
  @spaces.GPU
142
  def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale: float) -> Image.Image:
143
- """Stylized AI avatar using Stable Diffusion Img2Img with user inputs"""
144
- # Enhance face
145
- img_enhanced = enhance_face(img)
146
-
147
- # Resize for SD (512x512)
148
- img_resized = img_enhanced.convert("RGB").resize((512, 512))
149
 
150
  # Stylize with SD prompt. We are selecting these from UI now.
151
  #prompt = "highly detailed, digital portrait, professional lighting, cinematic style, artistic AI avatar"
@@ -153,13 +228,84 @@ def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale
153
  #prompt = "studio portrait, even lighting, neutral background, realistic skin, confident pose"
154
  #prompt = "realistic professional headshot, soft studio lighting, neutral background, crisp details, natural skin tone"
155
 
156
- with torch.autocast("cuda"):
157
- result = sd_pipe(prompt=prompt, image=img_resized, strength=strength, guidance_scale=guidance_scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- avatar = enhance_face(result.images[0])
160
-
161
  return avatar
162
 
 
163
  @spaces.GPU
164
  def process_all(img: Image.Image):
165
  """Process all three types at once"""
 
3
  # Author: Vijay S. Chaudhari | 2025
4
  # ==========================================
5
 
6
+ import importlib.util
7
  import gradio as gr
8
  import spaces
9
  import torch
10
  import cv2
11
  import numpy as np
12
+ from pathlib import Path
13
+
14
  from PIL import Image, ImageEnhance, ImageOps
15
  from rembg import remove
16
  from diffusers import StableDiffusionImg2ImgPipeline
17
  from diffusers import StableDiffusionXLPipeline
 
18
  import io
19
+ import os, sys, subprocess, warnings, logging
20
+
21
+ warnings.filterwarnings("ignore", category=UserWarning)
22
+ logging.getLogger("onnxruntime").setLevel(logging.ERROR)
23
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
24
+
25
+ # --- Ensure InstantID is available ---
26
+ if not Path("instantid").exists():
27
+ print("πŸ”„ Cloning InstantID repository...")
28
+ subprocess.run(["git", "clone", "--depth", "1", "https://github.com/InstantID/InstantID.git", "instantid"],check=True)
29
+
30
+ repo_root = Path("instantid").resolve()
31
+
32
+ # 🧭 Search for a pipeline file that matches *instantid*.py under the repo
33
+ candidates = list(repo_root.rglob("pipeline*instantid*.py"))
34
+ if not candidates:
35
+ # Fallback common names across commits
36
+ fallback_names = [
37
+ "pipelines/pipeline_instantid.py",
38
+ "pipelines/pipeline_stable_diffusion_instantid.py",
39
+ "pipelines/pipeline_stable_diffusion_xl_instantid.py",
40
+ ]
41
+ for name in fallback_names:
42
+ p = repo_root / name
43
+ if p.exists():
44
+ candidates = [p]
45
+ break
46
+
47
+ if not candidates:
48
+ raise FileNotFoundError(
49
+ "Could not locate an InstantID pipeline file under ./instantid. "
50
+ "Repo layout may have changed. Please check the repo structure."
51
+ )
52
+
53
+ pipeline_file = candidates[0]
54
+ print(f"βœ… Using InstantID pipeline file: {pipeline_file.relative_to(repo_root)}")
55
+
56
+ # πŸͺ„ Import the pipeline module by file path (no package needed)
57
+ spec = importlib.util.spec_from_file_location("instantid_pipeline", str(pipeline_file))
58
+ instantid_mod = importlib.util.module_from_spec(spec)
59
+ spec.loader.exec_module(instantid_mod) # type: ignore
60
+
61
+ # πŸ”Ž Pick a pipeline class that looks like an InstantID Pipeline
62
+ InstantIDPipeline = None
63
+ for attr in dir(instantid_mod):
64
+ if "InstantID" in attr and "Pipeline" in attr:
65
+ InstantIDPipeline = getattr(instantid_mod, attr)
66
+ break
67
+
68
+ if InstantIDPipeline is None:
69
+ # Helpful diagnostics
70
+ print("Available names in module:", [a for a in dir(instantid_mod) if "Pipeline" in a])
71
+ raise ImportError(
72
+ "Could not find an InstantID pipeline class. "
73
+ "Looked for a class name containing both 'InstantID' and 'Pipeline'."
74
+ )
75
+
76
+ print(f"βœ… Imported pipeline class: {InstantIDPipeline.__name__}")
77
+
78
+ '''
79
+ if os.path.exists("InstantID") and not os.path.exists("instantid"):
80
+ os.rename("InstantID", "instantid")
81
 
82
+ instantid_path = os.path.abspath("instantid")
83
+ sys.path.append(instantid_path)
84
+ sys.path.append(os.path.join(instantid_path, "pipelines"))
85
+
86
+ #sys.path.append(os.path.abspath("instantid"))
87
+ #sys.path.insert(0, os.path.join(os.getcwd(), 'InstantID'))
88
+ try:
89
+ from pipelines.pipeline_instantid import InstantIDPipeline
90
+ print("βœ… InstantIDPipeline imported successfully.")
91
+ except Exception as e:
92
+ print("⚠️ Failed to import InstantIDPipeline:", e)
93
+ InstantIDPipeline = None # graceful fallback
94
+ '''
95
 
96
  import torchvision
97
  print("Printing Torch and TorchVision versions:")
 
217
 
218
  @spaces.GPU
219
  def create_avatar(img: Image.Image, prompt: str, strength: float, guidance_scale: float) -> Image.Image:
220
+ """
221
+ Create a stylized AI avatar while preserving facial identity using InstantID.
222
+ Retains core facial features, skin tone, and expressions of the input photo.
223
+ """
 
 
224
 
225
  # Stylize with SD prompt. We are selecting these from UI now.
226
  #prompt = "highly detailed, digital portrait, professional lighting, cinematic style, artistic AI avatar"
 
228
  #prompt = "studio portrait, even lighting, neutral background, realistic skin, confident pose"
229
  #prompt = "realistic professional headshot, soft studio lighting, neutral background, crisp details, natural skin tone"
230
 
231
+ # --- Convert input ---
232
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
233
+ img = img.convert("RGB").resize((512, 512), Image.Resampling.LANCZOS)
234
+
235
+ # --- Step 1: Load InstantID + SDXL pipeline ---
236
+ pipe = StableDiffusionXLPipeline.from_pretrained(
237
+ "stabilityai/stable-diffusion-xl-base-1.0",
238
+ torch_dtype=torch.float16
239
+ ).to(device)
240
+
241
+ instantid = InstantIDPipeline.from_pretrained("InstantID/InstantID", torch_dtype=torch.float16,)
242
+ pipe.to("cuda" if torch.cuda.is_available() else "cpu")
243
+ #pipe.load_ip_adapter(instantid)
244
+
245
+ # --- Step 2: Optimize for ZeroGPU memory ---
246
+ pipe.enable_attention_slicing()
247
+ pipe.enable_model_cpu_offload()
248
+
249
+ # --- Step 3: Prepare conditioning (face embedding) ---
250
+ np_img = np.array(img)
251
+ bgr_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
252
+ face_emb = instantid.extract_face_embedding(bgr_img) # key step: ID embedding guidance
253
+
254
+ # --- Step 4: Stylized generation ---
255
+ gen = pipe.generate_with_identity(
256
+ image=img,
257
+ face_embedding=face_emb,
258
+ prompt=(
259
+ prompt
260
+ + ", portrait of the same person, consistent identity, detailed lighting, "
261
+ "highly realistic skin texture, cinematic color tones"
262
+ ),
263
+ strength=float(strength),
264
+ guidance_scale=float(guidance_scale),
265
+ num_inference_steps=30
266
+ )
267
+
268
+ avatar = gen.images[0]
269
+
270
+ # --- Step 5 (Optional): Post-process with GFPGAN for crispness ---
271
+ try:
272
+ from gfpgan import GFPGANer
273
+ from realesrgan import RealESRGANer
274
+ from basicsr.archs.rrdbnet_arch import RRDBNet
275
+
276
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
277
+ num_block=23, num_grow_ch=32, scale=2)
278
+ upsampler = RealESRGANer(
279
+ scale=2,
280
+ model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
281
+ model=model,
282
+ tile=400,
283
+ tile_pad=10,
284
+ pre_pad=0,
285
+ half=True,
286
+ device=device
287
+ )
288
+ face_enhancer = GFPGANer(
289
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
290
+ upscale=1,
291
+ arch='clean',
292
+ channel_multiplier=2,
293
+ bg_upsampler=upsampler,
294
+ device=device
295
+ )
296
+
297
+ img_cv = cv2.cvtColor(np.array(avatar), cv2.COLOR_RGB2BGR)
298
+ _, _, restored_img = face_enhancer.enhance(
299
+ img_cv, has_aligned=False, only_center_face=False,
300
+ paste_back=True, weight=0.4
301
+ )
302
+ avatar = Image.fromarray(cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB))
303
+ except Exception as e:
304
+ print(f"[WARN] GFPGAN post-process skipped: {e}")
305
 
 
 
306
  return avatar
307
 
308
+
309
  @spaces.GPU
310
  def process_all(img: Image.Image):
311
  """Process all three types at once"""