|
|
|
|
|
""" |
|
|
Vext-labs-7B-v1.1 — Inference Script |
|
|
Run autonomous penetration testing analysis with a single command. |
|
|
|
|
|
Usage: |
|
|
python run.py --prompt "Analyze this nmap scan: ..." |
|
|
python run.py --prompt-file scan_output.txt |
|
|
python run.py --interactive |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
import sys |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
|
|
|
MODEL_ID = "Vext-Labs-Inc/Vext-labs-7B-v1.1-" |
|
|
|
|
|
|
|
|
def load_model(device_map="auto", dtype=torch.bfloat16): |
|
|
"""Load the model and tokenizer.""" |
|
|
print(f"Loading {MODEL_ID}...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=dtype, |
|
|
device_map=device_map, |
|
|
) |
|
|
print("Model loaded successfully.") |
|
|
return tokenizer, model |
|
|
|
|
|
|
|
|
def generate(tokenizer, model, prompt, max_new_tokens=512, temperature=0.7): |
|
|
"""Generate a response from the model.""" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
temperature=temperature, |
|
|
do_sample=temperature > 0, |
|
|
top_p=0.9, |
|
|
repetition_penalty=1.1, |
|
|
) |
|
|
|
|
|
new_tokens = outputs[0][inputs["input_ids"].shape[1]:] |
|
|
return tokenizer.decode(new_tokens, skip_special_tokens=True) |
|
|
|
|
|
|
|
|
def interactive_mode(tokenizer, model, args): |
|
|
"""Run an interactive session.""" |
|
|
print("\n" + "=" * 60) |
|
|
print(" VEXT-labs-7B-v1.1 — Interactive Mode") |
|
|
print(" Type 'quit' or 'exit' to stop.") |
|
|
print("=" * 60 + "\n") |
|
|
|
|
|
while True: |
|
|
try: |
|
|
prompt = input(">>> ").strip() |
|
|
except (KeyboardInterrupt, EOFError): |
|
|
print("\nExiting.") |
|
|
break |
|
|
|
|
|
if prompt.lower() in ("quit", "exit", "q"): |
|
|
break |
|
|
if not prompt: |
|
|
continue |
|
|
|
|
|
response = generate( |
|
|
tokenizer, model, prompt, |
|
|
max_new_tokens=args.max_tokens, |
|
|
temperature=args.temperature, |
|
|
) |
|
|
print(f"\n{response}\n") |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Run inference with Vext-labs-7B-v1.1" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--prompt", type=str, default=None, |
|
|
help="Text prompt to send to the model" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--prompt-file", type=str, default=None, |
|
|
help="Path to a file containing the prompt (e.g., scan output)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--interactive", action="store_true", |
|
|
help="Launch interactive chat mode" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--max-tokens", type=int, default=512, |
|
|
help="Maximum new tokens to generate (default: 512)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--temperature", type=float, default=0.7, |
|
|
help="Sampling temperature (default: 0.7, use 0 for greedy)" |
|
|
) |
|
|
parser.add_argument( |
|
|
"--device-map", type=str, default="auto", |
|
|
help="Device map for model loading (default: auto)" |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
if not args.prompt and not args.prompt_file and not args.interactive: |
|
|
parser.print_help() |
|
|
print("\nError: provide --prompt, --prompt-file, or --interactive") |
|
|
sys.exit(1) |
|
|
|
|
|
tokenizer, model = load_model(device_map=args.device_map) |
|
|
|
|
|
if args.interactive: |
|
|
interactive_mode(tokenizer, model, args) |
|
|
return |
|
|
|
|
|
|
|
|
if args.prompt_file: |
|
|
with open(args.prompt_file, "r") as f: |
|
|
prompt = f.read().strip() |
|
|
else: |
|
|
prompt = args.prompt |
|
|
|
|
|
response = generate( |
|
|
tokenizer, model, prompt, |
|
|
max_new_tokens=args.max_tokens, |
|
|
temperature=args.temperature, |
|
|
) |
|
|
print(response) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|