File size: 2,516 Bytes
6653e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285988b
6653e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import gradio as gr

# ============================================================
# CONFIG
# ============================================================
MODEL_NAME = "gpt2"
WEIGHTS_PATH = "gpt2_spoc.pt"  # upload this file to your Space
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ============================================================
# LOAD MODEL & TOKENIZER
# ============================================================
tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME)
tokenizer.add_special_tokens({
    "additional_special_tokens": ["<|pc|>", "<|code|>", "<|end|>"]
})

model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)
model.resize_token_embeddings(len(tokenizer))
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))
model.to(DEVICE)
model.eval()

# ============================================================
# INFERENCE FUNCTION
# ============================================================
def generate_code(pseudo, max_new_tokens=200):
    if not pseudo.strip():
        return "⚠️ Please enter some pseudo-code."
    
    prompt = f"<|pc|>\n{pseudo.strip()}\n<|code|>\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.split("<|code|>")[-1].split("<|end|>")[0].strip()

# ============================================================
# GRADIO UI
# ============================================================
demo = gr.Interface(
    fn=generate_code,
    inputs=gr.Textbox(lines=10, label="🧠 Enter Pseudo-Code"),
    outputs=gr.Code(label="💻 Generated Code", language="python"),
    title="Pseudo-Code → Code Generator (GPT-2 SPOC)",
    description="Fine-tuned GPT-2 model that converts pseudo-code into working Python code.",
    examples=[
        ["Read integer n\nRead n integers into a list\nPrint the sum of the list"],
        ["Input two numbers a and b\nIf a > b, print a else print b"],
        ["Read a string s\nReverse it and print"]
    ],
    theme="gradio/soft",
)

# ============================================================
# LAUNCH
# ============================================================
if __name__ == "__main__":
    demo.launch()