42Cummer commited on
Commit
6346edb
·
verified ·
1 Parent(s): 4f2cd9e

Upload 3 files

Browse files
Files changed (3) hide show
  1. gradioapp.py +42 -0
  2. pipeline.py +132 -0
  3. pipeline.sh +58 -0
gradioapp.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import re
4
+
5
+ def imagegen_pipeline(image_path, optional_tags):
6
+ logs = []
7
+
8
+ # Run unified pipeline
9
+ cmd = ["bash", "pipeline.sh", image_path]
10
+ if optional_tags and optional_tags.strip():
11
+ cmd += ["-t", optional_tags]
12
+
13
+ proc = subprocess.run(cmd, capture_output=True, text=True)
14
+ logs.append("[Pipeline stdout]\n" + (proc.stdout or "").strip())
15
+ if proc.stderr:
16
+ logs.append("[Pipeline stderr]\n" + proc.stderr.strip())
17
+
18
+ if proc.returncode != 0:
19
+ return None, "\n\n".join(logs)
20
+
21
+ # Look for the output image path in stdout
22
+ stdout = proc.stdout or ""
23
+ saved_match = re.search(r"^Image saved as\s*(.+)$", stdout, re.MULTILINE)
24
+ if not saved_match:
25
+ logs.append("[App] Could not find 'Image saved as ...' in pipeline output.")
26
+ return None, "\n\n".join(logs)
27
+
28
+ output_path = saved_match.group(1).strip()
29
+ logs.append(f"[App] Output image: {output_path}")
30
+ return output_path, "\n\n".join(logs)
31
+
32
+ demo = gr.Interface(
33
+ fn=imagegen_pipeline,
34
+ inputs=[gr.Image(label="Input Image", type="filepath"), gr.Textbox(label="Optional Tags", value="")],
35
+ outputs=[gr.Image(label="Output Image", type="filepath"), gr.Textbox(label="Logs")],
36
+ title="GenshinfyV2 !!",
37
+ description="Generate an avatar-style image of your face from a Genshin Impact character reference.",
38
+ theme="default"
39
+ )
40
+
41
+ if __name__ == "__main__":
42
+ demo.launch(pwa=True)
pipeline.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ A more accurate Human to Anime Feature Matcher
4
+ """
5
+
6
+ import torch
7
+ import timm
8
+ from PIL import Image
9
+ import numpy as np
10
+ from torchvision import transforms
11
+ import os
12
+ import clip
13
+ import glob
14
+ import sys
15
+ import argparse
16
+ from diffusers import AutoencoderKL, StableDiffusionPipeline
17
+ import gc
18
+
19
+ def main():
20
+ # Parse command line arguments
21
+ parser = argparse.ArgumentParser(description='Human to Anime Feature Matcher using DINO and CLIP')
22
+ parser.add_argument('test_image', help='Path to the test image file')
23
+ parser.add_argument('-t', '--optional-tags', dest='optional_tags', default=None,
24
+ help="Optional tags separated by commas (e.g., 'blonde, green eyes')")
25
+ args = parser.parse_args()
26
+
27
+ # Check if test image file exists
28
+ if not os.path.exists(args.test_image):
29
+ print(f"Error: Test image file '{args.test_image}' not found")
30
+ sys.exit(1)
31
+
32
+ # Setup device
33
+ device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
34
+
35
+ # Load models
36
+ model = timm.create_model("vit_base_patch14_dinov2.lvd142m", pretrained=True)
37
+ model = model.eval().to(device)
38
+ clip_model, preprocess_clip = clip.load("ViT-B/32", device=device)
39
+
40
+ # Define transforms
41
+ transform = transforms.Compose([
42
+ transforms.Resize((518, 518)),
43
+ transforms.ToTensor(),
44
+ transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
45
+ ])
46
+
47
+ def get_dino_embedding(img_path):
48
+ img = Image.open(img_path).convert("RGB")
49
+ x = transform(img).unsqueeze(0).to(device)
50
+ with torch.no_grad():
51
+ emb = model.forward_features(x) # feature extraction
52
+ return emb.cpu().numpy().flatten()
53
+
54
+ def get_clip_embedding(img_path):
55
+ img = Image.open(img_path).convert("RGB")
56
+ img_pre = preprocess_clip(img).unsqueeze(0).to(device)
57
+ with torch.no_grad():
58
+ return clip_model.encode_image(img_pre).cpu().numpy().flatten()
59
+
60
+ # Get all PNG and JPG files from GenshinCharacters directory
61
+ avatar_files = glob.glob("./GenshinCharacters/*.png") + glob.glob("./GenshinCharacters/*.jpg")
62
+ dino_embeddings = [get_dino_embedding(img) for img in avatar_files]
63
+ clip_embeddings = [get_clip_embedding(img) for img in avatar_files]
64
+
65
+ # Get test image path from command line argument
66
+ test_path = args.test_image
67
+ query_dino_emb = get_dino_embedding(test_path)
68
+ query_clip_emb = get_clip_embedding(test_path)
69
+
70
+ def combined_similarity(q_dino, q_clip, a_dino, a_clip, alpha=0.67):
71
+ # normalize
72
+ q_dino /= np.linalg.norm(q_dino)
73
+ q_clip /= np.linalg.norm(q_clip)
74
+ a_dino /= np.linalg.norm(a_dino)
75
+ a_clip /= np.linalg.norm(a_clip)
76
+
77
+ sim_dino = np.dot(q_dino, a_dino)
78
+ sim_clip = np.dot(q_clip, a_clip)
79
+ return alpha*sim_clip + (1-alpha)*sim_dino
80
+
81
+ # Calculate similarities
82
+ similarities = [combined_similarity(query_dino_emb, query_clip_emb, emb[0], emb[1]) for emb in zip(dino_embeddings, clip_embeddings)]
83
+
84
+ # Find best match
85
+ best_idx = int(np.argmax(similarities))
86
+ # Print exact path only for downstream parsing compatibility
87
+ print(avatar_files[best_idx])
88
+ styletransfer_input = avatar_files[best_idx]
89
+
90
+ sd_device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
91
+ model_id = "xyn-ai/anything-v4.0"
92
+ pipe = StableDiffusionPipeline.from_pretrained(
93
+ model_id,
94
+ torch_dtype=torch.float32,
95
+ safety_checker=None
96
+ ).to(sd_device)
97
+ #pipe.enable_xformers_memory_efficient_attention()
98
+ vae = AutoencoderKL.from_pretrained(
99
+ "stabilityai/sd-vae-ft-mse",
100
+ torch_dtype=torch.float32
101
+ ).to(sd_device)
102
+ pipe.vae = vae
103
+ pipe.enable_attention_slicing("max") # uses the smallest possible slices (lowest VRAM, slowest)
104
+
105
+ def generate_image(file_path, optional_tags=None):
106
+ selected_character = os.path.splitext(os.path.basename(file_path))[0].lower()
107
+ # Handle empty optional tags
108
+ if optional_tags:
109
+ prompt = f"{selected_character}_(genshin impact), 1girl,{optional_tags}, portrait"
110
+ else:
111
+ prompt = f"{selected_character}_(genshin impact), 1girl, portrait"
112
+ negative_prompt = "realistic, photorealistic, low quality, blur"
113
+ result = pipe(
114
+ prompt=prompt,
115
+ negative_prompt=negative_prompt,
116
+ guidance_scale=7.5,
117
+ num_inference_steps=30,
118
+ num_images_per_prompt=1,
119
+ ).images[0]
120
+ fname = f"Avatar_like_{selected_character}.png"
121
+ result.save(fname)
122
+ print(f"Image saved as {fname}")
123
+
124
+ # Clear memory
125
+ del result
126
+ torch.cuda.empty_cache() if torch.cuda.is_available() else None
127
+ gc.collect()
128
+
129
+ generate_image(styletransfer_input, args.optional_tags)
130
+
131
+ if __name__ == "__main__":
132
+ main()
pipeline.sh ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Unified pipeline runner
4
+ # Usage: ./pipeline.sh <image_path> [-t "optional, tags"]
5
+
6
+ set -euo pipefail
7
+
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+
10
+ if [ $# -lt 1 ]; then
11
+ echo "Error: No image path provided"
12
+ echo "Usage: ./pipeline.sh <image_path> [-t \"optional, tags\"]"
13
+ exit 1
14
+ fi
15
+
16
+ IMAGE_PATH="$1"
17
+ shift || true
18
+
19
+ OPTIONAL_TAGS=""
20
+ while [ $# -gt 0 ]; do
21
+ case "$1" in
22
+ -t|--optional-tags)
23
+ shift
24
+ OPTIONAL_TAGS="${1:-}"
25
+ ;;
26
+ *)
27
+ echo "Unknown argument: $1"
28
+ echo "Usage: ./pipeline.sh <image_path> [-t \"optional, tags\"]"
29
+ exit 1
30
+ ;;
31
+ esac
32
+ shift || true
33
+ done
34
+
35
+ if [ ! -f "$IMAGE_PATH" ]; then
36
+ echo "Error: File '$IMAGE_PATH' does not exist"
37
+ exit 1
38
+ fi
39
+
40
+ # Prefer local venv if present
41
+ PYTHON_BIN="python3"
42
+ if [ -x "${SCRIPT_DIR}/.venv/bin/python" ]; then
43
+ PYTHON_BIN="${SCRIPT_DIR}/.venv/bin/python"
44
+ elif [ -x "${SCRIPT_DIR}/../.venv/bin/python" ]; then
45
+ PYTHON_BIN="${SCRIPT_DIR}/../.venv/bin/python"
46
+ fi
47
+
48
+ cd "$SCRIPT_DIR" || exit 1
49
+
50
+ CMD=("$PYTHON_BIN" "pipeline.py" "$IMAGE_PATH")
51
+ if [ -n "$OPTIONAL_TAGS" ]; then
52
+ CMD+=("-t" "$OPTIONAL_TAGS")
53
+ fi
54
+
55
+ echo "Running: ${CMD[*]}"
56
+ "${CMD[@]}"
57
+
58
+