Sweep Next-Edit 1.5B (GGUF)
A 1.5B parameter model for next-edit autocomplete, converted to ONNX format (as well as fp32 HF transformers-compatible version).
Model Description
Sweep Next-Edit predicts your next code edit before you make it. It runs locally on your laptop in under 500ms (with speculative decoding) and outperforms models over 4x its size on next-edit benchmarks. More details here.
Usage
HF Transformers
See usage with 🤗 Transformers
import time
import difflib
from transformers import AutoTokenizer, Qwen2ForCausalLM
import torch
# Load model and tokenizer
model_id = "Xenova/sweep-next-edit-1.5B"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
print("Loading model...")
model = Qwen2ForCausalLM.from_pretrained(model_id, device_map="auto")
def build_prompt(
context_files: dict[str, str],
recent_diffs: list[dict[str, str]],
file_path: str,
original_content: str,
current_content: str,
) -> str:
"""
Build a prompt following Sweep Next Edit's training format.
Format:
<|file_sep|>{file_path_1}
{file_content_1}
<|file_sep|>{file_path_2}
{file_content_2}
<|file_sep|>{changed_file_1}.diff
original:
{before_changes_of_diff}
updated:
{after_changes_of_diff}
<|file_sep|>original/{file_path}
{contents_prior_to_most_recent_change}
<|file_sep|>current/{file_path}
{current_state_of_contents}
<|file_sep|>updated/{file_path}
{updated_state_of_contents}
Args:
context_files: Dict mapping file paths to their contents (related files for context)
recent_diffs: List of dicts with 'file_path', 'original', and 'updated' keys
file_path: Path of the file being edited
original_content: Contents prior to most recent change
current_content: Current state of the file being edited
Returns:
Formatted prompt string
"""
prompt_parts = []
# Add context files
for path, content in context_files.items():
prompt_parts.append(f"<|file_sep|>{path}")
prompt_parts.append(content)
# Add recent diffs
for diff in recent_diffs:
prompt_parts.append(f"<|file_sep|>{diff['file_path']}.diff")
prompt_parts.append("original:")
prompt_parts.append(diff['original'])
prompt_parts.append("updated:")
prompt_parts.append(diff['updated'])
# Add original and current states
prompt_parts.append(f"<|file_sep|>original/{file_path}")
prompt_parts.append(original_content)
prompt_parts.append(f"<|file_sep|>current/{file_path}")
prompt_parts.append(current_content)
prompt_parts.append(f"<|file_sep|>updated/{file_path}")
return "\n".join(prompt_parts)
def generate(prompt: str, max_new_tokens: int = 512) -> str:
"""Generate completion using the Sweep Next Edit model."""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Get the stop token ids
stop_token_ids = [
tokenizer.convert_tokens_to_ids("<|file_sep|>"),
tokenizer.eos_token_id,
]
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False, # Use greedy decoding for deterministic output
pad_token_id=tokenizer.pad_token_id,
eos_token_id=stop_token_ids,
)
# Decode only the generated tokens (exclude the prompt)
generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
return generated_text
if __name__ == "__main__":
# Simple example: User is writing a greeting function
# The model predicts what they'll write next based on the pattern
file_path = "greet.py"
# Context: Other files in the codebase
context_files = {
"utils.py": """def get_time_of_day():
from datetime import datetime
hour = datetime.now().hour
if hour < 12:
return "morning"
elif hour < 18:
return "afternoon"
else:
return "evening"
""",
}
# Recent changes: User just added a personalized greeting
recent_diffs = [
{
"file_path": "greet.py",
"original": """def greet():
print("Hello!")""",
"updated": """def greet(name):
print(f"Hello, {name}!")""",
}
]
# Before the most recent change
original_content = """def greet(name):
print(f"Hello, {name}!")
greet("Alice")"""
# Current state: User just imported get_time_of_day
current_content = """from utils import get_time_of_day
def greet(name):
print(f"Hello, {name}!")
greet("Alice")"""
prompt = build_prompt(
context_files=context_files,
recent_diffs=recent_diffs,
file_path=file_path,
original_content=original_content,
current_content=current_content,
)
print("\n" + "=" * 80)
print("CURRENT CODE:")
print("=" * 80)
print(current_content)
print("\nGenerating prediction...")
start_time = time.time()
predicted_edit = generate(prompt)
end_time = time.time()
print("\n" + "=" * 80)
print("PREDICTED NEXT EDIT:")
print("=" * 80)
print(predicted_edit)
print("\n" + "=" * 80)
print(f"TIME TAKEN: {end_time - start_time:.2f} seconds")
print("=" * 80)
print("\n" + "=" * 80)
print("DIFF (what changed):")
print("=" * 80)
diff = difflib.unified_diff(
current_content.splitlines(keepends=True),
predicted_edit.splitlines(keepends=True),
fromfile=f"current/{file_path}",
tofile=f"updated/{file_path}",
lineterm=""
)
print("".join(diff))
Model Details
- Parameters: 1.5B
- Context Length: 8192 tokens
- Base Model: Qwen2.5-Coder
Links
- Blog Post - Technical details and benchmarks
- JetBrains Plugin - Sweep AI JetBrains Plugin
- HN Thread - Discuss implementation for VSCode, Neovim & Emacs
- Twitter Post - Ask us any other questions
License
Apache 2.0
- Downloads last month
- 31