| | import torch |
| | from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
| | import gradio as gr |
| |
|
| | |
| | model_name = "meta-llama/Llama-3.2-3B-Instruct" |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, |
| | torch_dtype=torch.float16, |
| | device_map="auto", |
| | ) |
| |
|
| | |
| | def generate_text(prompt, max_length=150, temperature=0.7, top_p=0.95): |
| | inputs = tokenizer(prompt, return_tensors="pt") |
| | outputs = model.generate( |
| | inputs["input_ids"], |
| | max_length=max_length, |
| | temperature=temperature, |
| | top_p=top_p, |
| | no_repeat_ngram_size=2, |
| | num_return_sequences=1, |
| | ) |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
|
| | |
| | def gradio_interface(prompt, max_length, temperature, top_p): |
| | return generate_text(prompt, max_length, temperature, top_p) |
| |
|
| | iface = gr.Interface( |
| | fn=gradio_interface, |
| | inputs=[ |
| | gr.Textbox(lines=5, label="Prompt"), |
| | gr.Slider(50, 500, value=150, step=10, label="Max Length"), |
| | gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"), |
| | gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"), |
| | ], |
| | outputs="text", |
| | title="LLaMA 3.2 Text Generator", |
| | description="Generate text using the LLaMA 3.2 model.", |
| | ) |
| |
|
| | |
| | iface.launch(share=True) |
| |
|