Spaces:

kaelpsu
/

windows-malware-analyzer

Sleeping

App Files Files Community

windows-malware-analyzer / app.py

kaelpsu

updating app.py

7d62042 6 months ago

raw

history blame contribute delete

5.2 kB

	import gradio as gr
	import torch
	from torchvision import transforms
	from transformers import ViTForImageClassification
	from PIL import Image
	import torch.nn.functional as F
	import numpy as np
	import math
	import io

	# --- CONFIGURAÇÃO ---
	MODEL_PATH = "./malware_vit_model"

	# 0. Configurar Dispositivo (Correção do Erro)
	# Detecta se há GPU disponível. Se houver, usa CUDA, senão usa CPU.
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Usando dispositivo para inferência: {device}")

	# 1. Carregar o Modelo
	print("Carregando modelo...")
	try:
	model = ViTForImageClassification.from_pretrained(MODEL_PATH)
	model.to(device) # Move o modelo para o dispositivo correto (GPU ou CPU)
	model.eval()

	labels = model.config.id2label
	print("Modelo carregado com sucesso!")
	except Exception as e:
	print(f"Erro ao carregar o modelo: {e}")
	labels = {0: "Erro", 1: "Modelo não encontrado"}

	# 2. Definir as Transformações
	transform_pipeline = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.Grayscale(num_output_channels=3),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
	])

	# 3. Função Auxiliar: Converter Binário para Imagem
	def binary_to_image(binary_data):
	"""
	Converte bytes brutos de malware em uma imagem em escala de cinza.
	A largura é fixa baseada no tamanho do arquivo (heurística de Nataraj et al.).

	" Image Representation of Malware: To extract image-based features, a malware binary has to be
	transformed to an image. A given malware binary is read as a 1D array (vector) of 8 bit unsigned
	integers and then organized into a 2D array (matrix). The width of the matrix is fixed depending
	on the file size and the height varies according to the file size".
	"""
	try:

	# Converte bytes para array numpy
	data = np.frombuffer(binary_data, dtype=np.uint8)

	# Se o arquivo estiver vazio, retorna None
	if len(data) == 0:
	return None

	# 2. Definir a largura (Width) baseada no tamanho do arquivo
	# Tabela empírica padrão usada na literatura acadêmica
	file_len = len(data)

	if file_len < 10 * 1024: # < 10 kB
	width = 32
	elif file_len < 30 * 1024: # 10 kB - 30 kB
	width = 64
	elif file_len < 60 * 1024: # 30 kB - 60 kB
	width = 128
	elif file_len < 100 * 1024: # 60 kB - 100 kB
	width = 256
	elif file_len < 200 * 1024: # 100 kB - 200 kB
	width = 384
	elif file_len < 500 * 1024: # 200 kB - 500 kB
	width = 512
	elif file_len < 1000 * 1024: # 500 kB - 1 MB
	width = 768
	else: # > 1 MB
	width = 1024

	# 3. Calcular a altura (Height)
	# Altura = Total de Bytes / Largura (arredondado para cima)
	height = math.ceil(file_len / width)

	# 4. Padding (Preenchimento)
	# O array 1D precisa ter exatamente width * height elementos para virar matriz.
	# Se faltar bytes para completar a última linha, preenchemos com 0 (preto).
	required_size = width * height
	pad_len = required_size - file_len

	if pad_len > 0:
	data = np.pad(data, (0, pad_len), 'constant', constant_values=0)

	# 5. Transformar em Matriz 2D e depois em Imagem
	img_array = data.reshape((height, width))
	img = Image.fromarray(img_array, 'L') # 'L' = Grayscale (8-bit pixels)

	return img

	except Exception as e:
	print(f"Erro ao converter {file_path}: {e}")
	return None

	# 4. Função Principal de Predição
	def predict(file_bytes):
	if file_bytes is None:
	return None, None

	# Converter binário para imagem
	image = binary_to_image(file_bytes)

	if image is None:
	return {"Erro": 0.0}, None

	try:
	input_tensor = transform_pipeline(image)
	input_tensor = input_tensor.unsqueeze(0)

	# CORREÇÃO CRÍTICA: Mover a entrada para o mesmo dispositivo do modelo
	input_tensor = input_tensor.to(device)

	with torch.no_grad():
	outputs = model(input_tensor)
	logits = outputs.logits
	probabilities = F.softmax(logits, dim=1)[0]

	confidences = {labels[i]: float(probabilities[i]) for i in range(len(labels))}

	image = image.resize((224, 224))

	return confidences, image

	except Exception as e:
	return {f"Erro no processamento: {str(e)}": 0.0}, None

	# --- INTERFACE GRADIO ---

	title = "🦠 Classificador de Malware (Binário -> ViT)"
	description = "Faça upload de qualquer arquivo (.exe, .bin, .dll). O sistema converte para imagem e classifica a família."

	interface = gr.Interface(
	fn=predict,
	inputs=gr.File(type="binary", label="Upload do Arquivo"),
	outputs=[
	gr.Label(num_top_classes=5, label="Famílias Prováveis"),
	gr.Image(type="pil", label="Representação Visual")
	],
	title=title,
	description=description
	)

	if __name__ == "__main__":
	interface.launch()