caffeinatedcherrychic
/

mistral-based-NIDS-old

Model card Files Files and versions

Metrics Training metrics Community

mistral-based-NIDS-old / finetune-test.py

caffeinatedcherrychic's picture

caffeinatedcherrychic

Upload folder using huggingface_hub

9955c09 verified almost 2 years ago

history blame contribute delete

2.6 kB

	# This script is used to test the model using a dataset

	# Import the necessary libraries
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from langchain.memory import ConversationBufferWindowMemory
	from peft import PeftModel
	import torch

	import json
	import sys

	# Check if the correct number of arguments are provided
	if len(sys.argv) != 2:
	print("Usage: python finetune.py <jsonl_file>")
	sys.exit(1)

	# Get the file path from the command-line argument
	jsonl_file_path = sys.argv[1]

	# Load the model and tokenizer
	base_model = "mistralai/Mistral-7B-Instruct-v0.2"
	tokenizer = AutoTokenizer.from_pretrained(base_model)
	tokenizer.add_special_tokens({"pad_token": "[PAD]"})
	base_model = AutoModelForCausalLM.from_pretrained(base_model)


	ft_model = PeftModel.from_pretrained(base_model, "./qlora-out")
	# ft_model = ft_model.merge_and_unload()
	ft_model.eval()

	# Set the device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	ft_model.to(device)

	# Read the JSONL file
	with open(jsonl_file_path, "r") as f:
	tp, tn, fp, fn = 0, 0, 0, 0
	for line in f:
	data = json.loads(line)
	user_in = data["input"]
	user_input = f"[INST] ###instruction: Check if the given traffic flow is normal or of an attacker or a victim\n###input: {user_in}\n#output: [/INST]"
	encodings = tokenizer(user_input, return_tensors="pt", padding=True).to(device)
	input_ids = encodings["input_ids"]
	attention_mask = encodings["attention_mask"]

	output_ids = ft_model.generate(input_ids, attention_mask = attention_mask, max_new_tokens=1000, num_return_sequences=1, do_sample=True, temperature=0.1, top_p=0.9)

	generated_ids = output_ids[0, input_ids.shape[-1]:]

	# Decode the output
	response = tokenizer.decode(generated_ids, skip_special_tokens=True).lower()

	# calculate true positive, true negative, false positive, false negative
	if "normal" not in response and data["output"] == response:
	tp += 1
	elif "normal" in response and data["output"] == response:
	tn += 1
	elif "normal" in response and data["output"] != response:
	fp += 1
	elif "normal" not in response and data["output"] != response:
	fn += 1
	else:
	print(f"Error: {response}, {data[output]}")
	print(f"User input: {user_in}")
	print(f"Generated response: {response}")
	print(f"Expected response: {data[output]}")
	print()

	print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")