| **EXAMPLE USAGE** | |
| ``` | |
| # Install required packages if needed | |
| # !pip install transformers torch unsloth | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from unsloth.chat_templates import get_chat_template | |
| from unsloth import FastLanguageModel | |
| import torch | |
| # Load the electrical engineering model | |
| model_name = "neuralnets/electrical_engg_model" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Apply the chat template to format inputs correctly | |
| tokenizer = get_chat_template( | |
| tokenizer, | |
| chat_template = "llama-3.1", | |
| ) | |
| # Enable faster inference using Unsloth | |
| model = FastLanguageModel.for_inference(model) | |
| # Move model to GPU if available (or specify your device) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| # Create an electrical engineering related query | |
| messages = [ | |
| {"role": "user", "content": "Explain the working principle of a three-phase induction motor."}, | |
| ] | |
| # Format the input using the chat template | |
| inputs = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize = True, | |
| add_generation_prompt = True, # Required for generation | |
| return_tensors = "pt", | |
| ).to(device) | |
| # Set up text streaming for real-time output | |
| from transformers import TextStreamer | |
| text_streamer = TextStreamer(tokenizer, skip_prompt = True) | |
| # Generate response | |
| outputs = model.generate( | |
| input_ids = inputs, | |
| streamer = text_streamer, | |
| max_new_tokens = 512, | |
| use_cache = True, | |
| temperature = 0.7, # Adjust temperature for creativity vs precision | |
| min_p = 0.05 # Nucleus sampling parameter | |
| ) | |
| # If you want to capture the full response as a string | |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| ``` |