|
|
import torch |
|
|
from transformers import GPT2LMHeadModel, GPT2TokenizerFast |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "gpt2" |
|
|
WEIGHTS_PATH = "gpt2_spoc.pt" |
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME) |
|
|
tokenizer.add_special_tokens({ |
|
|
"additional_special_tokens": ["<|pc|>", "<|code|>", "<|end|>"] |
|
|
}) |
|
|
|
|
|
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME) |
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE)) |
|
|
model.to(DEVICE) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_code(pseudo, max_new_tokens=200): |
|
|
if not pseudo.strip(): |
|
|
return "⚠️ Please enter some pseudo-code." |
|
|
|
|
|
prompt = f"<|pc|>\n{pseudo.strip()}\n<|code|>\n" |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) |
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
return text.split("<|code|>")[-1].split("<|end|>")[0].strip() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=generate_code, |
|
|
inputs=gr.Textbox(lines=10, label="🧠 Enter Pseudo-Code"), |
|
|
outputs=gr.Code(label="💻 Generated Code", language="python"), |
|
|
title="Pseudo-Code → Code Generator (GPT-2 SPOC)", |
|
|
description="Fine-tuned GPT-2 model that converts pseudo-code into working Python code.", |
|
|
examples=[ |
|
|
["Read integer n\nRead n integers into a list\nPrint the sum of the list"], |
|
|
["Input two numbers a and b\nIf a > b, print a else print b"], |
|
|
["Read a string s\nReverse it and print"] |
|
|
], |
|
|
theme="gradio/soft", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |