Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import pickle | |
| import io | |
| from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
| # Load model and tokenizer from pickle files | |
| print("Loading model and tokenizer...") | |
| # Determine device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| try: | |
| # Custom unpickler to handle device mapping | |
| class CPU_Unpickler(pickle.Unpickler): | |
| def find_class(self, module, name): | |
| if module == 'torch.storage' and name == '_load_from_bytes': | |
| return lambda b: torch.load(io.BytesIO(b), map_location=device) | |
| else: | |
| return super().find_class(module, name) | |
| # Load LoRA model with device mapping | |
| print("Loading LoRA model...") | |
| with open('gpt2_pseudo2code_lora_model.pkl', 'rb') as f: | |
| if device == "cpu": | |
| # Use custom unpickler for CPU | |
| model = CPU_Unpickler(f).load() | |
| else: | |
| model = pickle.load(f) | |
| print("✓ Model loaded successfully") | |
| # Load tokenizer | |
| print("Loading tokenizer...") | |
| with open('gpt2_pseudo2code_tokenizer.pkl', 'rb') as f: | |
| tokenizer = pickle.load(f) | |
| print("✓ Tokenizer loaded successfully") | |
| # Ensure model is on correct device | |
| model = model.to(device) | |
| model.eval() | |
| print(f"✓ Model ready on {device}") | |
| # Print model info | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| total_params = sum(p.numel() for p in model.parameters()) | |
| print(f"✓ Total parameters: {total_params:,}") | |
| print(f"✓ Trainable parameters: {trainable_params:,}") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| print("\nTrying alternative loading method...") | |
| try: | |
| # Alternative method: load with torch.load directly | |
| import io | |
| with open('gpt2_pseudo2code_lora_model.pkl', 'rb') as f: | |
| buffer = io.BytesIO(f.read()) | |
| model = torch.load(buffer, map_location=torch.device(device)) | |
| with open('gpt2_pseudo2code_tokenizer.pkl', 'rb') as f: | |
| tokenizer = pickle.load(f) | |
| model = model.to(device) | |
| model.eval() | |
| print("✓ Model loaded successfully using alternative method") | |
| except Exception as e2: | |
| print(f"Alternative loading also failed: {e2}") | |
| raise | |
| def generate_code(pseudocode, indent, line, max_length=128, temperature=0.7, top_p=0.9): | |
| """ | |
| Generate code from pseudo-code with line and indent information. | |
| Args: | |
| pseudocode: Input pseudo-code string | |
| indent: Indentation level | |
| line: Line number | |
| max_length: Maximum length of generated sequence | |
| temperature: Sampling temperature | |
| top_p: Nucleus sampling parameter | |
| Returns: | |
| Generated code string | |
| """ | |
| try: | |
| # Format input with line and indent information | |
| prompt = f"Pseudocode: {pseudocode} | Indent: {indent} | Line: {line}\nCode:" | |
| # Tokenize input | |
| inputs = tokenizer(prompt, return_tensors='pt', padding=True) | |
| # Move to same device as model | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Generate | |
| model.eval() | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| num_return_sequences=1 | |
| ) | |
| # Decode output | |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the code part | |
| if "Code:" in generated_text: | |
| code = generated_text.split("Code:")[1].strip() | |
| else: | |
| code = generated_text.strip() | |
| return code | |
| except Exception as e: | |
| return f"Error generating code: {str(e)}" | |
| def gradio_generate_code(pseudocode, indent, line, temperature=0.7, top_p=0.9, max_length=128): | |
| """ | |
| Wrapper function for Gradio interface. | |
| """ | |
| if not pseudocode.strip(): | |
| return "⚠️ Please enter some pseudocode!" | |
| try: | |
| indent = int(indent) | |
| line = int(line) | |
| generated_code = generate_code( | |
| pseudocode, | |
| indent, | |
| line, | |
| max_length=int(max_length), | |
| temperature=float(temperature), | |
| top_p=float(top_p) | |
| ) | |
| return generated_code | |
| except ValueError: | |
| return "⚠️ Indent and Line must be valid numbers!" | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| # Example pseudocodes | |
| examples = [ | |
| ["create integer n", 1, 1, 0.7, 0.9, 128], | |
| ["read n", 1, 2, 0.7, 0.9, 128], | |
| ["for i from 0 to n", 1, 3, 0.7, 0.9, 128], | |
| ["print i", 2, 4, 0.7, 0.9, 128], | |
| ["if n is equal to 0", 1, 5, 0.7, 0.9, 128], | |
| ["create string s", 1, 1, 0.7, 0.9, 128], | |
| ["read s", 1, 2, 0.7, 0.9, 128], | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Pseudo-Code to Code Generator") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🐍 Pseudo-Code to Code Generator (GPT-2 + LoRA) | |
| Convert natural language pseudo-code to executable code using a fine-tuned GPT-2 model with LoRA. | |
| **Model Details:** | |
| - Base Model: GPT-2 | |
| - Training: SPOC Dataset (C++ code examples) | |
| - Optimization: LoRA (Low-Rank Adaptation) + 16-bit precision | |
| - Trained on: 20,000 pseudo-code to code pairs | |
| **Note:** The model was trained on C++ code examples from the SPOC dataset, so it generates C++-style code. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📝 Input") | |
| pseudocode_input = gr.Textbox( | |
| label="Pseudocode", | |
| placeholder="Enter your pseudocode here...\nExample: create integer n", | |
| lines=5, | |
| max_lines=10 | |
| ) | |
| with gr.Row(): | |
| indent_input = gr.Number( | |
| label="Indent Level", | |
| value=1, | |
| precision=0, | |
| info="Indentation level (0=no indent, 1=first level, etc.)" | |
| ) | |
| line_input = gr.Number( | |
| label="Line Number", | |
| value=1, | |
| precision=0, | |
| info="Line number in the program" | |
| ) | |
| gr.Markdown("### ⚙️ Generation Parameters") | |
| with gr.Row(): | |
| temperature_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature", | |
| info="Higher = more creative/random" | |
| ) | |
| top_p_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.9, | |
| step=0.05, | |
| label="Top-p (Nucleus Sampling)", | |
| info="Probability threshold for sampling" | |
| ) | |
| max_length_slider = gr.Slider( | |
| minimum=64, | |
| maximum=256, | |
| value=128, | |
| step=16, | |
| label="Max Length", | |
| info="Maximum tokens to generate" | |
| ) | |
| generate_btn = gr.Button("🚀 Generate Code", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 💻 Generated Code") | |
| output = gr.Textbox( | |
| label="Generated Code", | |
| lines=15, | |
| max_lines=20, | |
| show_copy_button=True | |
| ) | |
| gr.Markdown("### 📚 Examples") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[pseudocode_input, indent_input, line_input, temperature_slider, top_p_slider, max_length_slider], | |
| outputs=output, | |
| fn=gradio_generate_code, | |
| cache_examples=False, | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### ℹ️ How to Use: | |
| 1. **Enter pseudocode**: Write your natural language description | |
| 2. **Set indent level**: Specify the indentation (0 for no indent, 1 for first level, etc.) | |
| 3. **Set line number**: Indicate the line position in your program | |
| 4. **Adjust parameters** (optional): Fine-tune temperature and top-p for different results | |
| 5. **Click Generate**: Get your code! | |
| ### 💡 Tips: | |
| - Higher temperature (0.8-1.2) = more creative but potentially less accurate | |
| - Lower temperature (0.5-0.7) = more conservative and predictable | |
| - Top-p controls diversity; 0.9 is usually a good balance | |
| - The model generates C++-style code as it was trained on the SPOC dataset | |
| ### 🔗 Resources: | |
| - [SPOC Dataset](https://github.com/sumith1896/spoc) | |
| - [Research Paper](https://arxiv.org/pdf/1906.04908) | |
| - Model trained with LoRA for efficiency | |
| """ | |
| ) | |
| # Connect button to function | |
| generate_btn.click( | |
| fn=gradio_generate_code, | |
| inputs=[pseudocode_input, indent_input, line_input, temperature_slider, top_p_slider, max_length_slider], | |
| outputs=output | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |