from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr import re # Load CodeGen (Python-specialised) tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono") model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono") def generate_code(user_request): """ Produce clean Python code from a natural language instruction. """ # A structured prompt works significantly better with CodeGen. prompt = ( "# Task: Write Python code that accomplishes the following:\n" f"# {user_request}\n" "# Code:\n" ) input_ids = tokenizer(prompt, return_tensors="pt").input_ids # Deterministic decoding avoids messy repetition. output_ids = model.generate( input_ids, max_length=256, num_beams=4, do_sample=False, eos_token_id=tokenizer.eos_token_id ) full_output = tokenizer.decode(output_ids[0]) # Remove the prompt section so that only the generated code remains. code_only = full_output.split("# Code:\n", 1)[-1] # Strip trailing text the model sometimes adds. code_only = code_only.strip() # Remove accidental markdown or stray tokens code_only = re.sub(r"<\|.*?\|>", "", code_only) return code_only with gr.Blocks(title="Code Generation with CodeGen-2B") as demo: gr.Markdown( """### Code Generation Assistant Provide a description of the code you need, and the model will return Python code only. """ ) task = gr.Textbox( lines=2, label="Task Description", placeholder="For example: create a function that prints the first n Fibonacci numbers." ) output = gr.Code( label="Generated Python Code", language="python" ) btn = gr.Button("Generate Code") btn.click(generate_code, inputs=task, outputs=output) demo.launch()