Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use ayjays132/Phillnet-2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use ayjays132/Phillnet-2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "ayjays132/Phillnet-2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/ayjays132/Phillnet-2

SGLang

How to use ayjays132/Phillnet-2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "ayjays132/Phillnet-2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "ayjays132/Phillnet-2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
```
docker model run hf.co/ayjays132/Phillnet-2
```

Phillnet-2 / ImageGen /pipeline.py

ayjays132

Upload 470 files

ad2ce18 verified 5 days ago

raw

history blame contribute delete

20.3 kB

	from __future__ import annotations

	import hashlib
	import re
	from collections import OrderedDict
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Any, Dict, List, Optional, Union

	import torch
	from PIL import Image, ImageEnhance, ImageFilter
	from transformers import AutoTokenizer

	from .model import UniversalHFTextToImageAdapter


	@dataclass
	class ImageGenPipelineOutput:
	images: List[Image.Image]
	tensors: Optional[torch.Tensor] = None
	conditioning: Optional[Any] = None
	prompt: Optional[Union[str, List[str]]] = None
	metadata: Dict[str, Any] = field(default_factory=dict)


	class ImageGenPipeline:
	"""
	Hugging Face-style pipeline for the trained ImageGen adapter.

	The pipeline preserves the adapter checkpoint exactly. It loads
	`adapter_model.pt`, routes text through the supplied/shared Qwen text model
	when available, and exposes a `DiffusionPipeline`-like `from_pretrained`
	and `__call__` surface.
	"""

	config_name = "model_index.json"

	def __init__(
	self,
	adapter: UniversalHFTextToImageAdapter,
	tokenizer: Optional[Any] = None,
	text_model: Optional[torch.nn.Module] = None,
	model_dir: Optional[Union[str, Path]] = None,
	):
	self.adapter = adapter
	self.tokenizer = tokenizer
	self.text_model = text_model
	self.model_dir = Path(model_dir) if model_dir is not None else None
	self.sdxl_tokenizer = None
	self.sdxl_tokenizer_2 = None
	self.sdxl_text_encoder = None
	self.sdxl_text_encoder_2 = None
	self.local_text_embedding = None
	self._prompt_cache: "OrderedDict[str, Dict[str, Any]]" = OrderedDict()
	self._prompt_cache_capacity = 32
	if self.text_model is not None:
	self.adapter.text_model = self.text_model
	self.adapter.freeze_text_model()

	@classmethod
	def from_pretrained(
	cls,
	model_dir: Union[str, Path],
	*,
	text_model: Optional[torch.nn.Module] = None,
	tokenizer: Optional[Any] = None,
	device: Optional[Union[str, torch.device]] = None,
	torch_dtype: Optional[torch.dtype] = None,
	local_files_only: bool = True,
	**_: Any,
	) -> "ImageGenPipeline":
	model_path = Path(model_dir)
	if tokenizer is None:
	tokenizer_dir = model_path / "tokenizer"
	if tokenizer_dir.exists():
	tokenizer = AutoTokenizer.from_pretrained(
	tokenizer_dir,
	use_fast=False,
	local_files_only=local_files_only,
	trust_remote_code=True,
	)
	adapter = UniversalHFTextToImageAdapter.from_pretrained(
	model_path,
	text_model=text_model,
	device=device or "cpu",
	)
	if torch_dtype is not None:
	adapter = adapter.to(dtype=torch_dtype)
	return cls(adapter=adapter, tokenizer=tokenizer, text_model=text_model, model_dir=model_path)

	def to(self, device: Union[str, torch.device], dtype: Optional[torch.dtype] = None) -> "ImageGenPipeline":
	self.adapter.to(device=device)
	if dtype is not None:
	self.adapter.to(dtype=dtype)
	if self.text_model is not None and hasattr(self.text_model, "to"):
	self.text_model.to(device=device)
	return self

	@property
	def device(self) -> torch.device:
	return next(self.adapter.parameters()).device

	@staticmethod
	def _normalize_prompt_text(prompt: str) -> str:
	text = re.sub(r"\s+", " ", str(prompt)).strip()
	if not text:
	return text
	pieces = [part.strip() for part in re.split(r"\s(?:,\|;\|\n)\s", text) if part.strip()]
	if len(pieces) <= 1:
	return text
	deduped: List[str] = []
	seen: set[str] = set()
	for piece in pieces:
	key = re.sub(r"\s+", " ", piece).casefold()
	if key not in seen:
	seen.add(key)
	deduped.append(piece)
	return ", ".join(deduped)

	def _normalize_prompt(self, prompt: Union[str, List[str]]) -> Union[str, List[str]]:
	if isinstance(prompt, str):
	return self._normalize_prompt_text(prompt)
	return [self._normalize_prompt_text(item) for item in prompt]

	def _cache_fingerprint(self, prompt: Union[str, List[str]], encoded: Dict[str, torch.Tensor], **parts: Any) -> str:
	digest = hashlib.sha256()
	digest.update(repr(prompt).encode("utf-8"))
	for key in ("input_ids", "attention_mask"):
	tensor = encoded.get(key)
	if tensor is not None:
	digest.update(key.encode("utf-8"))
	digest.update(str(tuple(tensor.shape)).encode("ascii"))
	digest.update(str(tensor.dtype).encode("ascii"))
	digest.update(tensor.detach().cpu().contiguous().numpy().tobytes())
	for key, value in sorted(parts.items()):
	digest.update(f"{key}={value!r}".encode("utf-8"))
	return digest.hexdigest()

	def _get_prompt_cache(self, key: str) -> Optional[Dict[str, Any]]:
	hit = self._prompt_cache.get(key)
	if hit is None:
	return None
	self._prompt_cache.move_to_end(key)
	return hit

	def _put_prompt_cache(self, key: str, value: Dict[str, Any]) -> None:
	self._prompt_cache[key] = value
	self._prompt_cache.move_to_end(key)
	while len(self._prompt_cache) > self._prompt_cache_capacity:
	self._prompt_cache.popitem(last=False)

	def _tokenize(self, prompt: Union[str, List[str]], max_length: int) -> Dict[str, torch.Tensor]:
	if self.tokenizer is None:
	raise ValueError("Tokenizer is not loaded. Pass tokenizer=... or include ImageGen/tokenizer.")
	prompts = [prompt] if isinstance(prompt, str) else prompt
	pad_tok = self.tokenizer.pad_token or "<\|endoftext\|>"
	prompts = [p if p != "" else pad_tok for p in prompts]
	encoded = self.tokenizer(
	prompts,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=max_length,
	)
	return {k: v.to(self.device) for k, v in encoded.items()}

	def _ensure_local_text_embedding(self) -> torch.nn.Embedding:
	if self.local_text_embedding is not None:
	return self.local_text_embedding
	if self.model_dir is None:
	raise ValueError("No text_model is attached and no model_dir is available for local embeddings.")
	from safetensors.torch import load_file

	root = self.model_dir.parent
	state_path = root / "model.safetensors"
	if not state_path.exists():
	raise ValueError(f"No text_model is attached and local embedding weights are missing: {state_path}")
	state = load_file(str(state_path), device="cpu")
	for key in (
	"model.language_model.embed_tokens.weight",
	"language_model.embed_tokens.weight",
	"model.embed_tokens.weight",
	):
	if key in state:
	dtype = next(self.adapter.parameters()).dtype
	self.local_text_embedding = torch.nn.Embedding.from_pretrained(state[key].to(dtype=dtype), freeze=True)
	self.local_text_embedding.to(device=self.device)
	return self.local_text_embedding
	raise KeyError("Could not find embed_tokens.weight in local model.safetensors.")

	def _resolve_sdxl_text_source(self) -> str:
	if self.model_dir is not None:
	local_text_stack = self.model_dir / "models" / "Phillnet-2-SDXL-TextEncoders"
	if (
	local_text_stack.exists()
	and (local_text_stack / "tokenizer").exists()
	and (local_text_stack / "tokenizer_2").exists()
	and (local_text_stack / "text_encoder").exists()
	and (local_text_stack / "text_encoder_2").exists()
	):
	return str(local_text_stack)
	backend = self.adapter.image_generator
	for attr in ("sdxl_text_encoder_model_name_or_path", "pretrained_unet_model_name_or_path", "vae_model_name_or_path"):
	value = getattr(backend, attr, None)
	if value:
	path = Path(str(value))
	has_text_stack = (
	path.exists()
	and (path / "tokenizer").exists()
	and (path / "tokenizer_2").exists()
	and (path / "text_encoder").exists()
	and (path / "text_encoder_2").exists()
	)
	if has_text_stack:
	return str(path)
	return "stabilityai/sdxl-turbo"

	def _ensure_sdxl_text_stack(self) -> None:
	if self.sdxl_text_encoder is not None and self.sdxl_text_encoder_2 is not None:
	return
	from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer

	model_name = self._resolve_sdxl_text_source()
	self.sdxl_tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
	self.sdxl_tokenizer_2 = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer_2")
	self.sdxl_text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
	self.sdxl_text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
	model_name,
	subfolder="text_encoder_2",
	)
	dtype = next(self.adapter.parameters()).dtype
	self.sdxl_text_encoder.to(device=self.device, dtype=dtype).eval()
	self.sdxl_text_encoder_2.to(device=self.device, dtype=dtype).eval()
	for module in (self.sdxl_text_encoder, self.sdxl_text_encoder_2):
	for param in module.parameters():
	param.requires_grad_(False)

	def _encode_sdxl_prompt(self, prompt: Union[str, List[str]]) -> Dict[str, torch.Tensor]:
	self._ensure_sdxl_text_stack()
	prompts = [prompt] if isinstance(prompt, str) else prompt

	def encode(tokenizer: Any, encoder: torch.nn.Module) -> Any:
	tokens = tokenizer(
	prompts,
	padding="max_length",
	max_length=tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	return encoder(tokens.input_ids.to(self.device), output_hidden_states=True)

	out_1 = encode(self.sdxl_tokenizer, self.sdxl_text_encoder)
	out_2 = encode(self.sdxl_tokenizer_2, self.sdxl_text_encoder_2)
	pooled = out_2.text_embeds if hasattr(out_2, "text_embeds") else out_2[0]
	return {
	"prompt_embeds": torch.cat([out_1.hidden_states[-2], out_2.hidden_states[-2]], dim=-1),
	"pooled_prompt_embeds": pooled,
	}

	@staticmethod
	def _tensor_to_pil(images: torch.Tensor) -> List[Image.Image]:
	images = images.detach().float().cpu().clamp(0, 1)
	if images.ndim == 3:
	images = images.unsqueeze(0)
	if images.shape[1] not in (1, 3, 4):
	raise ValueError(f"Expected image tensor [B,C,H,W], got {tuple(images.shape)}")
	if images.shape[1] == 1:
	images = images.repeat(1, 3, 1, 1)
	if images.shape[1] == 4:
	images = images[:, :3]
	images = (images.permute(0, 2, 3, 1).numpy() * 255).round().astype("uint8")
	return [Image.fromarray(image) for image in images]

	@staticmethod
	def _polish_image(image: Image.Image, strength: float = 0.22) -> Image.Image:
	strength = max(0.0, min(float(strength), 1.0))
	if strength <= 0.0:
	return image
	base = image.convert("RGB")
	denoised = base.filter(ImageFilter.MedianFilter(size=3))
	blended = Image.blend(base, denoised, strength)
	blended = ImageEnhance.Sharpness(blended).enhance(1.08)
	blended = ImageEnhance.Contrast(blended).enhance(1.03)
	return blended

	@torch.no_grad()
	def __call__(
	self,
	prompt: Union[str, List[str]],
	*,
	negative_prompt: Optional[Union[str, List[str]]] = None,
	height: int = 512,
	width: int = 512,
	num_inference_steps: Optional[int] = None,
	guidance_scale: float = 0.0,
	seed: Optional[int] = None,
	generation_strategy: str = "prior",
	refinement_steps: int = 2,
	quality_strength: float = 1.0,
	contract_strength: float = 0.0,
	contract_maps: Optional[torch.Tensor] = None,
	refiner_lora_strength: float = 0.0,
	latent_refiner_strength: float = 0.0,
	structure_prior_strength: float = 0.0,
	reference_pass_steps: int = 0,
	reference_latent_strength: float = 0.75,
	image_quality_polish: bool = False,
	image_quality_polish_strength: float = 0.22,
	output_type: str = "pil",
	return_dict: bool = True,
	**kwargs: Any,
	) -> Union[ImageGenPipelineOutput, List[Image.Image], torch.Tensor]:
	steps = int(num_inference_steps or self.adapter.image_generator.default_inference_steps)
	original_prompt = prompt
	prompt = self._normalize_prompt(prompt)
	prompt_was_normalized = prompt != original_prompt
	encoded = self._tokenize(prompt, max_length=int(self.adapter.max_condition_tokens))
	strategy = generation_strategy.lower().strip()
	use_memory = kwargs.get("use_memory", True)
	cache_key = self._cache_fingerprint(
	prompt,
	encoded,
	strategy=strategy,
	use_memory=use_memory,
	device=str(self.device),
	dtype=str(next(self.adapter.parameters()).dtype),
	text_model_attached=self.text_model is not None,
	)
	cached_prompt = self._get_prompt_cache(cache_key)
	call_kwargs: Dict[str, Any] = {
	"attention_mask": encoded.get("attention_mask"),
	"height": height,
	"width": width,
	"steps": steps,
	"guidance_scale": guidance_scale,
	"seed": seed,
	**kwargs,
	}
	cache_hit = cached_prompt is not None
	if cached_prompt is not None and "inputs_embeds" in cached_prompt:
	call_kwargs["inputs_embeds"] = cached_prompt["inputs_embeds"].to(self.device)
	elif self.text_model is not None and hasattr(self.text_model, "get_input_embeddings"):
	input_embeddings = self.text_model.get_input_embeddings()(encoded["input_ids"])
	call_kwargs["inputs_embeds"] = input_embeddings
	else:
	call_kwargs["inputs_embeds"] = self._ensure_local_text_embedding()(encoded["input_ids"])

	if guidance_scale > 1.0:
	if negative_prompt is None:
	if isinstance(prompt, str):
	negative_prompt = ""
	else:
	negative_prompt = [""] * len(prompt)
	negative_prompt = self._normalize_prompt(negative_prompt)
	neg_encoded = self._tokenize(negative_prompt, max_length=int(self.adapter.max_condition_tokens))
	neg_kwargs = {
	"attention_mask": neg_encoded.get("attention_mask"),
	"use_memory": False,
	}
	if self.text_model is not None and hasattr(self.text_model, "get_input_embeddings"):
	neg_input_embeddings = self.text_model.get_input_embeddings()(neg_encoded["input_ids"])
	neg_kwargs["inputs_embeds"] = neg_input_embeddings
	else:
	neg_kwargs["inputs_embeds"] = self._ensure_local_text_embedding()(neg_encoded["input_ids"])

	negative_conditioning = self.adapter.encode_inputs(**neg_kwargs)
	call_kwargs["negative_conditioning"] = negative_conditioning

	if strategy in {"prior", "text_prior", "condition"}:
	condition_kwargs = {
	"attention_mask": call_kwargs["attention_mask"],
	"height": height,
	"width": width,
	"refinement_steps": refinement_steps,
	"quality_strength": quality_strength,
	"contract_strength": contract_strength,
	"contract_maps": contract_maps,
	"refiner_lora_strength": refiner_lora_strength,
	"latent_refiner_strength": latent_refiner_strength,
	"structure_prior_strength": structure_prior_strength,
	"use_memory": use_memory,
	}
	if "inputs_embeds" in call_kwargs:
	condition_kwargs["inputs_embeds"] = call_kwargs["inputs_embeds"]
	else:
	condition_kwargs["input_ids"] = encoded["input_ids"]
	generated = self.adapter.condition_to_image(**condition_kwargs)
	elif strategy in {"diffusion", "latent_diffusion"}:
	if cached_prompt is not None and "sdxl_conditioning" in cached_prompt:
	call_kwargs["sdxl_conditioning"] = cached_prompt["sdxl_conditioning"]
	else:
	call_kwargs["sdxl_conditioning"] = self._encode_sdxl_prompt(prompt)
	if guidance_scale > 1.0:
	call_kwargs["negative_sdxl_conditioning"] = self._encode_sdxl_prompt(negative_prompt)
	reference_latents = None
	if int(reference_pass_steps) > 0:
	reference_kwargs = dict(call_kwargs)
	reference_kwargs["steps"] = int(reference_pass_steps)
	reference_latents = self.adapter.generate(
	**reference_kwargs,
	return_latents=True,
	quality_strength=quality_strength,
	contract_strength=contract_strength,
	contract_maps=contract_maps,
	latent_refiner_strength=latent_refiner_strength,
	structure_prior_strength=structure_prior_strength,
	)
	generated = self.adapter.generate(
	**call_kwargs,
	quality_strength=quality_strength,
	contract_strength=contract_strength,
	contract_maps=contract_maps,
	init_latents=reference_latents,
	init_latent_strength=reference_latent_strength,
	latent_refiner_strength=latent_refiner_strength,
	structure_prior_strength=structure_prior_strength,
	)
	else:
	raise ValueError(
	"generation_strategy must be 'prior' or 'diffusion', "
	f"got {generation_strategy!r}."
	)
	self._put_prompt_cache(
	cache_key,
	{
	"inputs_embeds": call_kwargs["inputs_embeds"].detach(),
	**({"sdxl_conditioning": call_kwargs["sdxl_conditioning"]} if "sdxl_conditioning" in call_kwargs else {}),
	},
	)
	metadata = {
	"prompt": prompt,
	"original_prompt": original_prompt,
	"prompt_was_normalized": prompt_was_normalized,
	"prompt_cache_hit": cache_hit,
	"prompt_cache_key": cache_key,
	"prompt_cache_entries": len(self._prompt_cache),
	"use_memory": use_memory,
	"generation_strategy": strategy,
	"reference_pass_steps": int(reference_pass_steps),
	"reference_latent_strength": reference_latent_strength,
	"used_reference_latents": int(reference_pass_steps) > 0,
	"image_quality_polish": bool(image_quality_polish),
	"image_quality_polish_strength": image_quality_polish_strength if image_quality_polish else 0.0,
	}
	if output_type == "pt":
	return ImageGenPipelineOutput(images=[], tensors=generated, prompt=prompt, metadata=metadata) if return_dict else generated
	images = self._tensor_to_pil(generated)
	if image_quality_polish:
	images = [self._polish_image(image, image_quality_polish_strength) for image in images]
	return ImageGenPipelineOutput(images=images, tensors=generated, prompt=prompt, metadata=metadata) if return_dict else images