import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import gradio as gr # Load the LLaMA model and tokenizer model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace this with the actual model name # Authenticate if needed # from huggingface_hub import login # login(token="your_huggingface_token") # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, # Use float16 for better performance on GPUs device_map="auto", # Automatically allocate the model to available devices ) # Function to generate text def generate_text(prompt, max_length=150, temperature=0.7, top_p=0.95): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( inputs["input_ids"], max_length=max_length, temperature=temperature, top_p=top_p, no_repeat_ngram_size=2, num_return_sequences=1, ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Gradio interface def gradio_interface(prompt, max_length, temperature, top_p): return generate_text(prompt, max_length, temperature, top_p) iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(lines=5, label="Prompt"), gr.Slider(50, 500, value=150, step=10, label="Max Length"), gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"), ], outputs="text", title="LLaMA 3.2 Text Generator", description="Generate text using the LLaMA 3.2 model.", ) # Launch Gradio interface iface.launch(share=True)