ankitkushwaha90's picture
Upload 2 files
df03681 verified
# Revised script to use the fine-tuned T5 model for generating command descriptions
# Includes debugging output and adjusted generation parameters
# Model directory: C:\app\dataset\new_cmd_model
# Prerequisites: Ensure transformers, torch, and sentencepiece are installed
import os
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
# Define model and tokenizer path
model_path = r"C:\app\dataset\new_cmd_model"
# Check if model directory exists
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model directory '{model_path}' not found. Please verify the path.")
# Load the fine-tuned model and tokenizer
try:
model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(model_path)
print("Model and tokenizer loaded successfully.")
except Exception as e:
raise Exception(f"Error loading model or tokenizer: {str(e)}")
# Function to generate a description for a given command and source
def generate_command_description(command_name, source, max_length=150):
# Format the input prompt as used during training
prompt = f"Describe the command: {command_name} in {source}"
print(f"Input prompt: {prompt}") # Debug: Show the prompt being used
# Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True)
# Move inputs to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inputs = {key: value.to(device) for key, value in inputs.items()}
print(f"Using device: {device}") # Debug: Show device being used
# Generate output with adjusted parameters
try:
outputs = model.generate(
inputs["input_ids"],
max_length=max_length, # Increased for longer descriptions
num_beams=5, # Increased beams for better quality
length_penalty=1.2, # Slightly favor longer outputs
early_stopping=True,
no_repeat_ngram_size=2, # Prevent repetitive phrases
do_sample=False # Use beam search, not sampling
)
# Decode and return the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
if not generated_text.strip():
return "Warning: Empty description generated. Check model training or prompt format."
return generated_text
except Exception as e:
return f"Error generating description: {str(e)}"
# Example usage: Generate descriptions for a few commands
test_commands = [
("ls", "linux"),
("dir", "cmd"),
("chmod", "macos"),
("MsgBox", "vbscript")
]
print("\nGenerated Command Descriptions:")
print("-" * 50)
for command_name, source in test_commands:
description = generate_command_description(command_name, source)
print(f"Command: {command_name} ({source})")
print(f"Description: {description}")
print("-" * 50)
# Interactive mode: Allow user to input a command and source
print("\nInteractive Mode: Enter a command and source to get its description.")
print("Valid sources: cmd, linux, macos, vbscript")
print("Type 'exit' to quit.\n")
while True:
command_name = input("Enter command name (or 'exit' to quit): ").strip()
if command_name.lower() == "exit":
break
source = input("Enter source (e.g., cmd, linux, macos, vbscript): ").strip().lower()
# Validate source
valid_sources = ["cmd", "linux", "macos", "vbscript"]
if source not in valid_sources:
print(f"Invalid source. Please use one of: {', '.join(valid_sources)}")
continue
description = generate_command_description(command_name, source)
print(f"\nCommand: {command_name} ({source})")
print(f"Description: {description}")
print("-" * 50)
print("Exiting interactive mode.")