Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use ayjays132/Phillnet-2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use ayjays132/Phillnet-2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "ayjays132/Phillnet-2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/ayjays132/Phillnet-2

SGLang

How to use ayjays132/Phillnet-2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "ayjays132/Phillnet-2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "ayjays132/Phillnet-2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
```
docker model run hf.co/ayjays132/Phillnet-2
```

Phillnet-2 / ImageGen /model /lora.py

ayjays132

Upload 478 files

101858b verified 2 days ago

raw

history blame contribute delete

5.08 kB

	"""Minimal LoRA wrappers + injector for fine-tuning a frozen base model.

	LoRALinear / LoRAConv2d: forward = frozen_base(x) + scaling * B(A(x))
	where A: (in -> r), B: (r -> out). A is Kaiming init, B is zero init,
	so the wrapped module starts as an exact identity to the base layer.

	inject_lora(model, ...) walks ``model.named_modules()`` and replaces target
	Linear/Conv2d layers in-place. The original base weights remain on the
	module (just .requires_grad_(False)); only the LoRA A/B matrices train.

	This is intentionally tiny — no scaling schedules, no rank-stabilization,
	no merging. If you need PEFT's full feature set, install peft. For our
	single-checkpoint fine-tune use case this is enough.
	"""

	from __future__ import annotations

	from typing import Iterable, List, Optional, Tuple

	import torch
	import torch.nn as nn


	class LoRALinear(nn.Module):
	def __init__(self, base: nn.Linear, rank: int, alpha: Optional[float] = None):
	super().__init__()
	if not isinstance(base, nn.Linear):
	raise TypeError(f"LoRALinear expects nn.Linear, got {type(base).__name__}")
	self.base = base
	for p in self.base.parameters():
	p.requires_grad_(False)
	self.rank = int(rank)
	self.alpha = float(alpha) if alpha is not None else float(rank)
	self.scaling = self.alpha / self.rank
	self.lora_A = nn.Linear(base.in_features, self.rank, bias=False)
	self.lora_B = nn.Linear(self.rank, base.out_features, bias=False)
	nn.init.kaiming_uniform_(self.lora_A.weight, a=5 ** 0.5)
	nn.init.zeros_(self.lora_B.weight)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	return self.base(x) + self.lora_B(self.lora_A(x)) * self.scaling


	class LoRAConv2d(nn.Module):
	"""Rank-r low-rank decomposition for a Conv2d: A is 1x1 (in->r), B is
	the original kernel size (r->out). Adds to the base conv output."""

	def __init__(self, base: nn.Conv2d, rank: int, alpha: Optional[float] = None):
	super().__init__()
	if not isinstance(base, nn.Conv2d):
	raise TypeError(f"LoRAConv2d expects nn.Conv2d, got {type(base).__name__}")
	self.base = base
	for p in self.base.parameters():
	p.requires_grad_(False)
	self.rank = int(rank)
	self.alpha = float(alpha) if alpha is not None else float(rank)
	self.scaling = self.alpha / self.rank
	self.lora_A = nn.Conv2d(
	base.in_channels, self.rank,
	kernel_size=1, stride=1, padding=0, bias=False,
	)
	self.lora_B = nn.Conv2d(
	self.rank, base.out_channels,
	kernel_size=base.kernel_size,
	stride=base.stride,
	padding=base.padding,
	dilation=base.dilation,
	groups=1,
	bias=False,
	)
	nn.init.kaiming_uniform_(self.lora_A.weight, a=5 ** 0.5)
	nn.init.zeros_(self.lora_B.weight)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	return self.base(x) + self.lora_B(self.lora_A(x)) * self.scaling


	def _module_matches(name: str, patterns: Iterable[str]) -> bool:
	return any(p in name for p in patterns)


	def inject_lora(
	root: nn.Module,
	target_substrings: Iterable[str],
	rank: int = 16,
	alpha: Optional[float] = None,
	include_linear: bool = True,
	include_conv2d: bool = True,
	skip_substrings: Iterable[str] = (),
	) -> Tuple[int, List[str]]:
	"""Replace target Linear / Conv2d layers under ``root`` with LoRA wrappers.

	Returns (count, names_replaced).

	The walk does a snapshot of ``named_modules()`` first so we can mutate
	parents during iteration. Skips ``root.text_model`` and any module whose
	qualified name contains one of ``skip_substrings``.
	"""
	if not target_substrings:
	return 0, []

	skip_substrings = list(skip_substrings) + ["text_model"]
	targets = list(target_substrings)
	snapshot = list(root.named_modules())
	replaced: List[str] = []
	count = 0

	for qname, module in snapshot:
	if not qname:
	continue
	if _module_matches(qname, skip_substrings):
	continue
	if not _module_matches(qname, targets):
	continue
	if include_linear and isinstance(module, nn.Linear):
	new_mod = LoRALinear(module, rank=rank, alpha=alpha)
	elif include_conv2d and isinstance(module, nn.Conv2d):
	new_mod = LoRAConv2d(module, rank=rank, alpha=alpha)
	else:
	continue
	# Set on parent
	parent_path, _, leaf = qname.rpartition(".")
	parent = root.get_submodule(parent_path) if parent_path else root
	setattr(parent, leaf, new_mod)
	replaced.append(qname)
	count += 1
	return count, replaced


	def lora_parameter_count(root: nn.Module) -> int:
	n = 0
	for m in root.modules():
	if isinstance(m, (LoRALinear, LoRAConv2d)):
	n += sum(p.numel() for p in m.lora_A.parameters())
	n += sum(p.numel() for p in m.lora_B.parameters())
	return n