Text Generation
Transformers
Diffusers
Safetensors
English
gpt_oss
phillnet-2
gpt-oss
multimodal
image-generation
video-generation
speech
audio
custom-code
conversational
custom_code
Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ayjays132/Phillnet-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use ayjays132/Phillnet-2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ayjays132/Phillnet-2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/ayjays132/Phillnet-2
- SGLang
How to use ayjays132/Phillnet-2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ayjays132/Phillnet-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ayjays132/Phillnet-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ayjays132/Phillnet-2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
docker model run hf.co/ayjays132/Phillnet-2
| """Minimal LoRA wrappers + injector for fine-tuning a frozen base model. | |
| LoRALinear / LoRAConv2d: forward = frozen_base(x) + scaling * B(A(x)) | |
| where A: (in -> r), B: (r -> out). A is Kaiming init, B is zero init, | |
| so the wrapped module starts as an exact identity to the base layer. | |
| inject_lora(model, ...) walks ``model.named_modules()`` and replaces target | |
| Linear/Conv2d layers in-place. The original base weights remain on the | |
| module (just .requires_grad_(False)); only the LoRA A/B matrices train. | |
| This is intentionally tiny — no scaling schedules, no rank-stabilization, | |
| no merging. If you need PEFT's full feature set, install peft. For our | |
| single-checkpoint fine-tune use case this is enough. | |
| """ | |
| from __future__ import annotations | |
| from typing import Iterable, List, Optional, Tuple | |
| import torch | |
| import torch.nn as nn | |
| class LoRALinear(nn.Module): | |
| def __init__(self, base: nn.Linear, rank: int, alpha: Optional[float] = None): | |
| super().__init__() | |
| if not isinstance(base, nn.Linear): | |
| raise TypeError(f"LoRALinear expects nn.Linear, got {type(base).__name__}") | |
| self.base = base | |
| for p in self.base.parameters(): | |
| p.requires_grad_(False) | |
| self.rank = int(rank) | |
| self.alpha = float(alpha) if alpha is not None else float(rank) | |
| self.scaling = self.alpha / self.rank | |
| self.lora_A = nn.Linear(base.in_features, self.rank, bias=False) | |
| self.lora_B = nn.Linear(self.rank, base.out_features, bias=False) | |
| nn.init.kaiming_uniform_(self.lora_A.weight, a=5 ** 0.5) | |
| nn.init.zeros_(self.lora_B.weight) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| return self.base(x) + self.lora_B(self.lora_A(x)) * self.scaling | |
| class LoRAConv2d(nn.Module): | |
| """Rank-r low-rank decomposition for a Conv2d: A is 1x1 (in->r), B is | |
| the original kernel size (r->out). Adds to the base conv output.""" | |
| def __init__(self, base: nn.Conv2d, rank: int, alpha: Optional[float] = None): | |
| super().__init__() | |
| if not isinstance(base, nn.Conv2d): | |
| raise TypeError(f"LoRAConv2d expects nn.Conv2d, got {type(base).__name__}") | |
| self.base = base | |
| for p in self.base.parameters(): | |
| p.requires_grad_(False) | |
| self.rank = int(rank) | |
| self.alpha = float(alpha) if alpha is not None else float(rank) | |
| self.scaling = self.alpha / self.rank | |
| self.lora_A = nn.Conv2d( | |
| base.in_channels, self.rank, | |
| kernel_size=1, stride=1, padding=0, bias=False, | |
| ) | |
| self.lora_B = nn.Conv2d( | |
| self.rank, base.out_channels, | |
| kernel_size=base.kernel_size, | |
| stride=base.stride, | |
| padding=base.padding, | |
| dilation=base.dilation, | |
| groups=1, | |
| bias=False, | |
| ) | |
| nn.init.kaiming_uniform_(self.lora_A.weight, a=5 ** 0.5) | |
| nn.init.zeros_(self.lora_B.weight) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| return self.base(x) + self.lora_B(self.lora_A(x)) * self.scaling | |
| def _module_matches(name: str, patterns: Iterable[str]) -> bool: | |
| return any(p in name for p in patterns) | |
| def inject_lora( | |
| root: nn.Module, | |
| target_substrings: Iterable[str], | |
| rank: int = 16, | |
| alpha: Optional[float] = None, | |
| include_linear: bool = True, | |
| include_conv2d: bool = True, | |
| skip_substrings: Iterable[str] = (), | |
| ) -> Tuple[int, List[str]]: | |
| """Replace target Linear / Conv2d layers under ``root`` with LoRA wrappers. | |
| Returns (count, names_replaced). | |
| The walk does a snapshot of ``named_modules()`` first so we can mutate | |
| parents during iteration. Skips ``root.text_model`` and any module whose | |
| qualified name contains one of ``skip_substrings``. | |
| """ | |
| if not target_substrings: | |
| return 0, [] | |
| skip_substrings = list(skip_substrings) + ["text_model"] | |
| targets = list(target_substrings) | |
| snapshot = list(root.named_modules()) | |
| replaced: List[str] = [] | |
| count = 0 | |
| for qname, module in snapshot: | |
| if not qname: | |
| continue | |
| if _module_matches(qname, skip_substrings): | |
| continue | |
| if not _module_matches(qname, targets): | |
| continue | |
| if include_linear and isinstance(module, nn.Linear): | |
| new_mod = LoRALinear(module, rank=rank, alpha=alpha) | |
| elif include_conv2d and isinstance(module, nn.Conv2d): | |
| new_mod = LoRAConv2d(module, rank=rank, alpha=alpha) | |
| else: | |
| continue | |
| # Set on parent | |
| parent_path, _, leaf = qname.rpartition(".") | |
| parent = root.get_submodule(parent_path) if parent_path else root | |
| setattr(parent, leaf, new_mod) | |
| replaced.append(qname) | |
| count += 1 | |
| return count, replaced | |
| def lora_parameter_count(root: nn.Module) -> int: | |
| n = 0 | |
| for m in root.modules(): | |
| if isinstance(m, (LoRALinear, LoRAConv2d)): | |
| n += sum(p.numel() for p in m.lora_A.parameters()) | |
| n += sum(p.numel() for p in m.lora_B.parameters()) | |
| return n | |