Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| import re | |
| # Load CodeGen (Python-specialised) | |
| tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono") | |
| model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono") | |
| def generate_code(user_request): | |
| """ | |
| Produce clean Python code from a natural language instruction. | |
| """ | |
| # A structured prompt works significantly better with CodeGen. | |
| prompt = ( | |
| "# Task: Write Python code that accomplishes the following:\n" | |
| f"# {user_request}\n" | |
| "# Code:\n" | |
| ) | |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids | |
| # Deterministic decoding avoids messy repetition. | |
| output_ids = model.generate( | |
| input_ids, | |
| max_length=256, | |
| num_beams=4, | |
| do_sample=False, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| full_output = tokenizer.decode(output_ids[0]) | |
| # Remove the prompt section so that only the generated code remains. | |
| code_only = full_output.split("# Code:\n", 1)[-1] | |
| # Strip trailing text the model sometimes adds. | |
| code_only = code_only.strip() | |
| # Remove accidental markdown or stray tokens | |
| code_only = re.sub(r"<\|.*?\|>", "", code_only) | |
| return code_only | |
| with gr.Blocks(title="Code Generation with CodeGen-2B") as demo: | |
| gr.Markdown( | |
| """### Code Generation Assistant | |
| Provide a description of the code you need, and the model will return Python code only. | |
| """ | |
| ) | |
| task = gr.Textbox( | |
| lines=2, | |
| label="Task Description", | |
| placeholder="For example: create a function that prints the first n Fibonacci numbers." | |
| ) | |
| output = gr.Code( | |
| label="Generated Python Code", | |
| language="python" | |
| ) | |
| btn = gr.Button("Generate Code") | |
| btn.click(generate_code, inputs=task, outputs=output) | |
| demo.launch() | |