Instructions to use bdbj/Dream-Coder-v0-Instruct-7B-SM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use bdbj/Dream-Coder-v0-Instruct-7B-SM with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="bdbj/Dream-Coder-v0-Instruct-7B-SM", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("bdbj/Dream-Coder-v0-Instruct-7B-SM", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use bdbj/Dream-Coder-v0-Instruct-7B-SM with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "bdbj/Dream-Coder-v0-Instruct-7B-SM"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "bdbj/Dream-Coder-v0-Instruct-7B-SM",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/bdbj/Dream-Coder-v0-Instruct-7B-SM

SGLang

How to use bdbj/Dream-Coder-v0-Instruct-7B-SM with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "bdbj/Dream-Coder-v0-Instruct-7B-SM" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "bdbj/Dream-Coder-v0-Instruct-7B-SM",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "bdbj/Dream-Coder-v0-Instruct-7B-SM" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "bdbj/Dream-Coder-v0-Instruct-7B-SM",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use bdbj/Dream-Coder-v0-Instruct-7B-SM with Docker Model Runner:
```
docker model run hf.co/bdbj/Dream-Coder-v0-Instruct-7B-SM
```

Dream-Coder-v0-Instruct-7B-SM / softmasking_utils.py

bdbj

Upload folder using huggingface_hub

77d04c3 verified 4 months ago

raw

history blame contribute delete

4.91 kB

	import torch
	import torch.nn.functional as F
	from dataclasses import dataclass

	@dataclass
	class SMArgs:
	"""Arguments for Softmasking"""

	# sm algorithm
	sm_alg: str = "none" # "mixinputs_with_topk" or "mixinputs_with_temp"
	sm_schedule: str = "none" # "none", "linear", or "stepwise"

	# lambda(·) parameters
	scale: float = 0.0 # overall strength of mixing (0 disables mixing)
	steepness: float = 0.0 # sigmoid steepness for entropy->lambda map
	offset: float = 0.0 # sigmoid offset entropy->lambda map

	# used only when sm_alg == "mixinputs_with_topk"
	mixinputs_k: int = 3
	# used only when sm_alg == "mixinputs_with_temp"
	mixinputs_temp: float = 1.0

	def get_mixing_factors_for_softmasking(input_ids, logits_prelim, mask_token_id, max_gen_length, sm_args):
	"""Compute mixing factors and output probabilities for Softmasking."""

	# Create a one-hot distribution for the original input `xt`.
	xt_one_hot = F.one_hot(input_ids, num_classes=logits_prelim.shape[-1]).to(logits_prelim.dtype)

	# First get the negative entropy to calculate lambda
	temperature = sm_args.mixinputs_temp if sm_args.sm_alg == "mixinputs_with_temp" else 1.0
	neg_entropy, p = get_neg_entropy_and_probabilities(logits_prelim, temperature=temperature)

	# Update scale with schedule if needed
	if sm_args.sm_schedule != "none":
	num_mask_token = (input_ids == mask_token_id).sum().item()
	scale = get_time_dependence(
	max_gen_length=max_gen_length,
	num_mask_token=num_mask_token,
	scale=sm_args.scale,
	schedule=sm_args.sm_schedule
	)
	else:
	scale = sm_args.scale

	# Calculate lambda tensor
	mask_positions = (input_ids == mask_token_id)
	lambda_tensor = calculate_lambda_tensor(neg_entropy, mask_positions,
	scale, sm_args.steepness, sm_args.offset)

	if sm_args.sm_alg == "mixinputs_with_topk":
	# Only fill probabilities for top-k tokens
	p = get_only_topk_probs(logits_prelim, sm_args.mixinputs_k)

	# Create convex combination for output probabilities
	p_out = (1 - lambda_tensor) * xt_one_hot \
	+ lambda_tensor * p

	return p_out

	def get_neg_entropy_and_probabilities(logits, temperature=1.0):
	"""Get negative entropy and probabilities from logits"""

	epsilon = 1e-10
	p = torch.softmax(logits / temperature, dim=-1) # (B,T,V)
	logp = torch.log(p + epsilon)
	neg_entropy = torch.sum(p * logp, dim=-1)
	return neg_entropy, p

	def calculate_lambda_tensor(neg_entropy, mask_positions, scale, steepness, offset):
	"""Calculate lambda tensor from negative entropy"""

	if neg_entropy is None or scale == 0.0:
	return torch.zeros_like(neg_entropy)

	# scale negative entropy to [0,1] using sigmoid
	lambda_tensor = neg_entropy
	lambda_tensor = scale * torch.sigmoid(steepness * (lambda_tensor - offset))

	# apply only on mask positions
	lambda_tensor = torch.where(mask_positions, lambda_tensor, torch.zeros_like(lambda_tensor))
	return lambda_tensor.unsqueeze(-1) # (B,T,1)

	def get_only_topk_probs(logits, mixinputs_k=3):
	"""Compute a full-vocabulary probability tensor where only the top-k tokens per position
	receive softmax probabilities and all other entries are zero."""

	topk_logits, topk_indices = torch.topk(logits, k=mixinputs_k, dim=-1) # (batch_size, seq_len, k)

	topk_probs = torch.softmax(topk_logits, dim=-1) # (batch_size, seq_len, k)
	topk_sum = topk_probs.sum(dim=-1) # (batch_size, seq_len)
	assert torch.allclose(topk_sum, torch.ones_like(topk_sum), atol=1e-1), \
	f"Top-k softmax probabilities do not sum to 1: max deviation = {(topk_sum - 1).abs().max().item()}"

	probs_full = torch.zeros_like(logits) # (B, L, V)
	probs_full.scatter_(-1, topk_indices, topk_probs) # fill top-k
	assert torch.sum(probs_full > 0).item() == mixinputs_k * logits.shape[0] * logits.shape[1], \
	f"Number of non-zero entries in probs_full is incorrect: got {torch.sum(probs_full > 0).item()}, expected {mixinputs_k * logits.shape[0] * logits.shape[1]}"

	return probs_full

	def get_time_dependence(
	max_gen_length: int,
	num_mask_token: int,
	scale: float,
	schedule: str,
	sm_to_hm: bool = True,
	threshold: float = 0.5,
	) -> float:
	"""Return scale factor depending on decoding progress."""
	t = num_mask_token / max_gen_length if max_gen_length else 1.0

	if schedule == "none":
	return scale

	if schedule == "linear":
	return scale * (t if sm_to_hm else 1 - t)

	if schedule == "stepwise":
	cond = t > threshold if sm_to_hm else t < threshold
	return scale if cond else 0

	raise ValueError(f"Unknown schedule: {schedule}")