# Revised script to use the fine-tuned T5 model for generating command descriptions # Includes debugging output and adjusted generation parameters # Model directory: C:\app\dataset\new_cmd_model # Prerequisites: Ensure transformers, torch, and sentencepiece are installed import os from transformers import T5ForConditionalGeneration, T5Tokenizer import torch # Define model and tokenizer path model_path = r"C:\app\dataset\new_cmd_model" # Check if model directory exists if not os.path.exists(model_path): raise FileNotFoundError(f"Model directory '{model_path}' not found. Please verify the path.") # Load the fine-tuned model and tokenizer try: model = T5ForConditionalGeneration.from_pretrained(model_path) tokenizer = T5Tokenizer.from_pretrained(model_path) print("Model and tokenizer loaded successfully.") except Exception as e: raise Exception(f"Error loading model or tokenizer: {str(e)}") # Function to generate a description for a given command and source def generate_command_description(command_name, source, max_length=150): # Format the input prompt as used during training prompt = f"Describe the command: {command_name} in {source}" print(f"Input prompt: {prompt}") # Debug: Show the prompt being used # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True) # Move inputs to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) inputs = {key: value.to(device) for key, value in inputs.items()} print(f"Using device: {device}") # Debug: Show device being used # Generate output with adjusted parameters try: outputs = model.generate( inputs["input_ids"], max_length=max_length, # Increased for longer descriptions num_beams=5, # Increased beams for better quality length_penalty=1.2, # Slightly favor longer outputs early_stopping=True, no_repeat_ngram_size=2, # Prevent repetitive phrases do_sample=False # Use beam search, not sampling ) # Decode and return the generated text generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) if not generated_text.strip(): return "Warning: Empty description generated. Check model training or prompt format." return generated_text except Exception as e: return f"Error generating description: {str(e)}" # Example usage: Generate descriptions for a few commands test_commands = [ ("ls", "linux"), ("dir", "cmd"), ("chmod", "macos"), ("MsgBox", "vbscript") ] print("\nGenerated Command Descriptions:") print("-" * 50) for command_name, source in test_commands: description = generate_command_description(command_name, source) print(f"Command: {command_name} ({source})") print(f"Description: {description}") print("-" * 50) # Interactive mode: Allow user to input a command and source print("\nInteractive Mode: Enter a command and source to get its description.") print("Valid sources: cmd, linux, macos, vbscript") print("Type 'exit' to quit.\n") while True: command_name = input("Enter command name (or 'exit' to quit): ").strip() if command_name.lower() == "exit": break source = input("Enter source (e.g., cmd, linux, macos, vbscript): ").strip().lower() # Validate source valid_sources = ["cmd", "linux", "macos", "vbscript"] if source not in valid_sources: print(f"Invalid source. Please use one of: {', '.join(valid_sources)}") continue description = generate_command_description(command_name, source) print(f"\nCommand: {command_name} ({source})") print(f"Description: {description}") print("-" * 50) print("Exiting interactive mode.")