Instructions to use ayjays132/Phillnet-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use ayjays132/Phillnet-2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("ayjays132/Phillnet-2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use ayjays132/Phillnet-2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "ayjays132/Phillnet-2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/ayjays132/Phillnet-2

SGLang

How to use ayjays132/Phillnet-2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "ayjays132/Phillnet-2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "ayjays132/Phillnet-2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "ayjays132/Phillnet-2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use ayjays132/Phillnet-2 with Docker Model Runner:
```
docker model run hf.co/ayjays132/Phillnet-2
```

Phillnet-2 / memory_optimization /tensor_pool.py

ayjays132

Upload 478 files

101858b verified 2 days ago

raw

history blame contribute delete

3.58 kB

	"""
	Tensor Pool Module
	Unified tensor pooling system for memory efficiency.
	"""
	import torch
	import logging
	from typing import Dict, Tuple, List
	from collections import defaultdict

	logger = logging.getLogger(__name__)

	class TensorPool:
	"""
	Unified tensor pool for efficient memory management.
	"""
	def __init__(self, max_pool_size: int = 50, max_tensor_size: int = 1000000):
	self.max_pool_size = max_pool_size
	self.max_tensor_size = max_tensor_size
	self.pools = defaultdict(list)
	self.usage_stats = defaultdict(int)
	self.operation_count = 0

	logger.debug("TensorPool initialized")

	def get_tensor(self, shape: Tuple[int, ...], dtype: torch.dtype = torch.float32,
	requires_grad: bool = False, device: torch.device = None) -> torch.Tensor:
	"""
	Get tensor from pool or create new one.

	Args:
	shape: Tensor shape
	dtype: Tensor data type
	requires_grad: Whether tensor requires gradients
	device: Device to create tensor on

	Returns:
	Tensor from pool or newly created tensor
	"""
	self.operation_count += 1
	key = (shape, dtype, requires_grad)

	# Try to get tensor from pool
	if key in self.pools and self.pools[key]:
	tensor = self.pools[key].pop()
	tensor.zero_() # Clear tensor
	self.usage_stats[key] += 1
	return tensor.to(device) if device else tensor

	# Create new tensor
	if device is None:
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	tensor = torch.zeros(shape, dtype=dtype, device=device, requires_grad=requires_grad)
	self.usage_stats[key] += 1

	return tensor

	def return_tensor(self, tensor: torch.Tensor) -> None:
	"""
	Return tensor to pool for reuse.

	Args:
	tensor: Tensor to return to pool
	"""
	if tensor is None or not isinstance(tensor, torch.Tensor):
	return

	# Don't pool very large tensors
	if tensor.numel() > self.max_tensor_size:
	return

	key = (tuple(tensor.shape), tensor.dtype, tensor.requires_grad)

	# Only pool if we have space
	if len(self.pools[key]) < self.max_pool_size:
	tensor.detach_()
	self.pools[key].append(tensor)

	def clear_pool(self, keep_ratio: float = 0.5) -> None:
	"""
	Clear tensor pool, keeping a percentage.

	Args:
	keep_ratio: Ratio of pool to keep (0.0 to 1.0)
	"""
	for key, pool in self.pools.items():
	if len(pool) > self.max_pool_size * keep_ratio:
	excess = len(pool) - int(self.max_pool_size * keep_ratio)
	for _ in range(excess):
	if pool:
	pool.pop()

	def clear_all(self) -> None:
	"""Clear all tensor pools."""
	self.pools.clear()
	self.usage_stats.clear()
	logger.debug("TensorPool cleared")

	def get_stats(self) -> Dict:
	"""Get pool statistics."""
	return {
	'pools': {str(k): len(v) for k, v in self.pools.items()},
	'usage_stats': dict(self.usage_stats),
	'operation_count': self.operation_count
	}