from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import re

# Load CodeGen (Python-specialised)
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-2B-mono")
model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-2B-mono")

def generate_code(user_request):
    """
    Produce clean Python code from a natural language instruction.
    """

    # A structured prompt works significantly better with CodeGen.
    prompt = (
        "# Task: Write Python code that accomplishes the following:\n"
        f"# {user_request}\n"
        "# Code:\n"
    )

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    # Deterministic decoding avoids messy repetition.
    output_ids = model.generate(
        input_ids,
        max_length=256,
        num_beams=4,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id
    )

    full_output = tokenizer.decode(output_ids[0])

    # Remove the prompt section so that only the generated code remains.
    code_only = full_output.split("# Code:\n", 1)[-1]

    # Strip trailing text the model sometimes adds.
    code_only = code_only.strip()

    # Remove accidental markdown or stray tokens
    code_only = re.sub(r"<\|.*?\|>", "", code_only)

    return code_only


with gr.Blocks(title="Code Generation with CodeGen-2B") as demo:

    gr.Markdown(
        """### Code Generation Assistant  
Provide a description of the code you need, and the model will return Python code only.
        """
    )

    task = gr.Textbox(
        lines=2,
        label="Task Description",
        placeholder="For example: create a function that prints the first n Fibonacci numbers."
    )

    output = gr.Code(
        label="Generated Python Code",
        language="python"
    )

    btn = gr.Button("Generate Code")
    btn.click(generate_code, inputs=task, outputs=output)

demo.launch()