Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig | |
| import torch | |
| # Load the model and tokenizer with 4-bit quantization | |
| model_name = "codellama/CodeLlama-3b-hf" # Use a smaller model | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map="auto" | |
| ) | |
| # Create a text generation pipeline | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| # Define the Gradio interface | |
| def generate_code(instruction): | |
| prompt = f"Instruction: {instruction}\nResponse:" | |
| output = generator(prompt, max_length=100, num_return_sequences=1) # Limit max_length | |
| return output[0]["generated_text"] | |
| gr.Interface( | |
| fn=generate_code, | |
| inputs=gr.Textbox(label="Enter your coding task:"), | |
| outputs=gr.Textbox(label="Generated Code"), | |
| title="Espa AI - Code Assistant" | |
| ).launch() |