Sentinel / inference.py
prelington's picture
Create inference.py
cca0ede verified
raw
history blame
1.31 kB
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
def load_model(model_name="your-username/sentinel"):
"""
Load Sentinel model and tokenizer.
"""
print(f"Loading {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # Uses GPU if available
trust_remote_code=True
)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
return generator
def code_with_sentinel(prompt, generator, max_new_tokens=200):
"""
Generate code from a natural language prompt.
"""
print(f"\nPrompt: {prompt}\n")
output = generator(
prompt,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=0.9,
temperature=0.7,
eos_token_id=generator.tokenizer.eos_token_id
)
result = output[0]["generated_text"]
# Return only new code, not the full prompt
return result[len(prompt):].strip()
if __name__ == "__main__":
# Example usage
generator = load_model("your-username/sentinel")
prompt = "Write a Python function that checks if a number is prime."
code = code_with_sentinel(prompt, generator)
print("Generated Code:\n")
print(code)