vext-pentest-7b / run.py
VextLabs's picture
Upload VEXT Pentest 7B LoRA adapter v1.1
a89e319 verified
#!/usr/bin/env python3
"""
Vext-labs-7B-v1.1 — Inference Script
Run autonomous penetration testing analysis with a single command.
Usage:
python run.py --prompt "Analyze this nmap scan: ..."
python run.py --prompt-file scan_output.txt
python run.py --interactive
"""
import argparse
import sys
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_ID = "Vext-Labs-Inc/Vext-labs-7B-v1.1-"
def load_model(device_map="auto", dtype=torch.bfloat16):
"""Load the model and tokenizer."""
print(f"Loading {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=dtype,
device_map=device_map,
)
print("Model loaded successfully.")
return tokenizer, model
def generate(tokenizer, model, prompt, max_new_tokens=512, temperature=0.7):
"""Generate a response from the model."""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=temperature > 0,
top_p=0.9,
repetition_penalty=1.1,
)
# Decode only the new tokens (skip the prompt)
new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
return tokenizer.decode(new_tokens, skip_special_tokens=True)
def interactive_mode(tokenizer, model, args):
"""Run an interactive session."""
print("\n" + "=" * 60)
print(" VEXT-labs-7B-v1.1 — Interactive Mode")
print(" Type 'quit' or 'exit' to stop.")
print("=" * 60 + "\n")
while True:
try:
prompt = input(">>> ").strip()
except (KeyboardInterrupt, EOFError):
print("\nExiting.")
break
if prompt.lower() in ("quit", "exit", "q"):
break
if not prompt:
continue
response = generate(
tokenizer, model, prompt,
max_new_tokens=args.max_tokens,
temperature=args.temperature,
)
print(f"\n{response}\n")
def main():
parser = argparse.ArgumentParser(
description="Run inference with Vext-labs-7B-v1.1"
)
parser.add_argument(
"--prompt", type=str, default=None,
help="Text prompt to send to the model"
)
parser.add_argument(
"--prompt-file", type=str, default=None,
help="Path to a file containing the prompt (e.g., scan output)"
)
parser.add_argument(
"--interactive", action="store_true",
help="Launch interactive chat mode"
)
parser.add_argument(
"--max-tokens", type=int, default=512,
help="Maximum new tokens to generate (default: 512)"
)
parser.add_argument(
"--temperature", type=float, default=0.7,
help="Sampling temperature (default: 0.7, use 0 for greedy)"
)
parser.add_argument(
"--device-map", type=str, default="auto",
help="Device map for model loading (default: auto)"
)
args = parser.parse_args()
if not args.prompt and not args.prompt_file and not args.interactive:
parser.print_help()
print("\nError: provide --prompt, --prompt-file, or --interactive")
sys.exit(1)
tokenizer, model = load_model(device_map=args.device_map)
if args.interactive:
interactive_mode(tokenizer, model, args)
return
# Get prompt from argument or file
if args.prompt_file:
with open(args.prompt_file, "r") as f:
prompt = f.read().strip()
else:
prompt = args.prompt
response = generate(
tokenizer, model, prompt,
max_new_tokens=args.max_tokens,
temperature=args.temperature,
)
print(response)
if __name__ == "__main__":
main()