Spaces:

hamxaameer
/

pseudo2code

Sleeping

App Files Files Community

hamxaameer commited on Oct 26, 2025

Commit

262de9f

1 Parent(s): b22edd6

Create app.py

Browse files

Files changed (1) hide show

app.py +620 -0

app.py ADDED Viewed

	@@ -0,0 +1,620 @@

+import gradio as gr
+import pickle
+import torch
+import numpy as np
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+from nltk.tokenize import word_tokenize
+import nltk
+import time
+import os
+# Download required NLTK data
+try:
+    nltk.download('punkt', quiet=True)
+    nltk.download('punkt_tab', quiet=True)
+except:
+    pass
+# Global variables to store loaded model
+loaded_model = None
+loaded_tokenizer = None
+loaded_config = None
+generation_history = []
+def load_model_from_pickle(pickle_file):
+    """Load model from uploaded pickle file"""
+    global loaded_model, loaded_tokenizer, loaded_config
+    try:
+        # Load pickle file
+        with open(pickle_file.name, 'rb') as f:
+            model_package = pickle.load(f)
+        loaded_model = model_package['model']
+        loaded_tokenizer = model_package['tokenizer']
+        loaded_config = model_package['config']
+        # Set model to evaluation mode
+        loaded_model.eval()
+        # Move to appropriate device
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        loaded_model = loaded_model.to(device)
+        config_info = f"""✅ Model loaded successfully!
+📊 Model Configuration:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+• Base Model: {loaded_config.get('model_name', 'GPT-2')}
+• Training Epochs: {loaded_config.get('num_epochs', 'N/A')}
+• Training Samples: {loaded_config.get('training_samples', 'N/A'):,}
+• Validation Samples: {loaded_config.get('validation_samples', 'N/A'):,}
+• BLEU Score: {loaded_config.get('bleu_score', 0):.4f}
+• Perplexity: {loaded_config.get('perplexity', 0):.2f}
+• Final Loss: {loaded_config.get('final_loss', 0):.4f}
+• Device: {device}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🚀 Model is ready to generate code!
+"""
+        return config_info
+    except Exception as e:
+        return f"❌ Error loading model: {str(e)}\n\nPlease ensure you uploaded the correct best_model.pkl file."
+def calculate_bleu_score(reference, hypothesis):
+    """Calculate BLEU score between reference and generated code"""
+    try:
+        # Tokenize
+        ref_tokens = word_tokenize(reference.lower())
+        hyp_tokens = word_tokenize(hypothesis.lower())
+        # Calculate BLEU with smoothing
+        smooth = SmoothingFunction()
+        bleu_1 = sentence_bleu([ref_tokens], hyp_tokens, weights=(1, 0, 0, 0), smoothing_function=smooth.method1)
+        bleu_2 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.5, 0.5, 0, 0), smoothing_function=smooth.method1)
+        bleu_3 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.33, 0.33, 0.33, 0), smoothing_function=smooth.method1)
+        bleu_4 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth.method1)
+        return bleu_1, bleu_2, bleu_3, bleu_4
+    except Exception as e:
+        return 0.0, 0.0, 0.0, 0.0
+def calculate_code_metrics(reference, generated):
+    """Calculate various code similarity metrics"""
+    try:
+        # Length ratio
+        len_ratio = len(generated) / max(len(reference), 1)
+        # Word overlap
+        ref_words = set(reference.lower().split())
+        gen_words = set(generated.lower().split())
+        if len(ref_words) > 0:
+            precision = len(ref_words.intersection(gen_words)) / len(gen_words) if len(gen_words) > 0 else 0
+            recall = len(ref_words.intersection(gen_words)) / len(ref_words)
+            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+        else:
+            precision = recall = f1 = 0
+        # Character-level similarity
+        char_overlap = sum(1 for c in generated if c in reference) / max(len(generated), 1)
+        return {
+            'length_ratio': len_ratio,
+            'precision': precision,
+            'recall': recall,
+            'f1_score': f1,
+            'char_overlap': char_overlap
+        }
+    except Exception as e:
+        return {
+            'length_ratio': 0,
+            'precision': 0,
+            'recall': 0,
+            'f1_score': 0,
+            'char_overlap': 0
+        }
+def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
+    """Generate code from pseudo-code using loaded model"""
+    global loaded_model, loaded_tokenizer, generation_history
+    if loaded_model is None or loaded_tokenizer is None:
+        return "❌ Please upload and load a model first!", "", "", ""
+    if not pseudo_code.strip():
+        return "❌ Please enter pseudo-code description!", "", "", ""
+    try:
+        start_time = time.time()
+        # Format input
+        prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> <CODE>"
+        # Tokenize
+        device = next(loaded_model.parameters()).device
+        inputs = loaded_tokenizer(prompt, return_tensors='pt').to(device)
+        # Generate (ensure type safety for parameters)
+        with torch.no_grad():
+            outputs = loaded_model.generate(
+                **inputs,
+                max_length=int(max_length),
+                temperature=float(temperature),
+                top_k=int(top_k),
+                top_p=float(top_p),
+                do_sample=True,
+                num_return_sequences=int(num_sequences),
+                pad_token_id=loaded_tokenizer.pad_token_id,
+                eos_token_id=loaded_tokenizer.eos_token_id,
+            )
+        generation_time = time.time() - start_time
+        # Decode all sequences
+        generated_codes = []
+        for output in outputs:
+            generated = loaded_tokenizer.decode(output, skip_special_tokens=False)
+            # Extract code part
+            if '<CODE>' in generated:
+                code = generated.split('<CODE>')[-1].strip()
+                # Remove special tokens
+                code = code.replace('<PAD>', '').replace('<SEP>', '').strip()
+            else:
+                code = generated
+            generated_codes.append(code)
+        # Use the first generated code as primary output
+        primary_code = generated_codes[0]
+        # Calculate metrics if reference code is provided
+        metrics_output = ""
+        bleu_output = ""
+        if reference_code and reference_code.strip():
+            # Calculate BLEU scores
+            bleu_1, bleu_2, bleu_3, bleu_4 = calculate_bleu_score(reference_code, primary_code)
+            bleu_output = f"""📊 BLEU Scores:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+• BLEU-1 (Unigram): {bleu_1:.4f} ({bleu_1*100:.2f}%)
+• BLEU-2 (Bigram):  {bleu_2:.4f} ({bleu_2*100:.2f}%)
+• BLEU-3 (Trigram): {bleu_3:.4f} ({bleu_3*100:.2f}%)
+• BLEU-4 (4-gram):  {bleu_4:.4f} ({bleu_4*100:.2f}%)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+💡 Interpretation:
+• BLEU > 0.4: Excellent match
+• BLEU 0.3-0.4: Good match
+• BLEU 0.2-0.3: Fair match
+• BLEU < 0.2: Poor match
+"""
+            # Calculate additional metrics
+            code_metrics = calculate_code_metrics(reference_code, primary_code)
+            metrics_output = f"""📈 Additional Metrics:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+• Length Ratio: {code_metrics['length_ratio']:.3f}
+• Precision: {code_metrics['precision']:.4f} ({code_metrics['precision']*100:.2f}%)
+• Recall: {code_metrics['recall']:.4f} ({code_metrics['recall']*100:.2f}%)
+• F1-Score: {code_metrics['f1_score']:.4f} ({code_metrics['f1_score']*100:.2f}%)
+• Character Overlap: {code_metrics['char_overlap']:.4f} ({code_metrics['char_overlap']*100:.2f}%)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⏱️ Generation Time: {generation_time:.2f}s
+📝 Sequences Generated: {num_sequences}
+🔢 Output Length: {len(primary_code)} characters
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""
+        else:
+            metrics_output = f"""⏱️ Generation Time: {generation_time:.2f}s
+📝 Sequences Generated: {num_sequences}
+🔢 Output Length: {len(primary_code)} characters
+💡 Tip: Provide reference code to see BLEU scores and similarity metrics!
+"""
+        # Format alternative sequences
+        alternatives = ""
+        if num_sequences > 1:
+            alternatives = "🔄 Alternative Generations:\n" + "━"*50 + "\n\n"
+            for i, code in enumerate(generated_codes[1:], 2):
+                alternatives += f"Variation {i}:\n```python\n{code}\n```\n\n"
+        # Add to history
+        generation_history.append({
+            'pseudo': pseudo_code,
+            'generated': primary_code,
+            'bleu_4': bleu_4 if reference_code else None,
+            'time': generation_time
+        })
+        return primary_code, metrics_output, bleu_output, alternatives
+    except Exception as e:
+        return f"❌ Error generating code: {str(e)}", "", "", ""
+def show_examples(example_name):
+    """Load example pseudo-code"""
+    examples = {
+        "Basic Loop": "create a list of numbers from 1 to 10",
+        "Function Definition": "define a function to calculate the sum of two numbers",
+        "List Iteration": "iterate through a list and print each element",
+        "Conditional Check": "check if a number is even or odd",
+        "Sorting": "sort a list in descending order",
+        "Maximum Element": "create a function to find maximum element in array",
+        "Binary Search": "implement binary search algorithm",
+        "Factorial": "create a recursive function to calculate factorial",
+        "Palindrome": "check if a string is palindrome",
+        "Fibonacci": "generate fibonacci sequence up to n terms"
+    }
+    return examples.get(example_name, "")
+def clear_all():
+    """Clear all inputs and outputs"""
+    return "", "", "", "", "", 150, 0.7, 50, 0.95, 1
+def show_history():
+    """Display generation history"""
+    if not generation_history:
+        return "No generation history yet. Start generating code!"
+    history_text = "📜 Generation History:\n" + "="*60 + "\n\n"
+    for i, entry in enumerate(reversed(generation_history[-10:]), 1):  # Show last 10
+        history_text += f"{i}. Pseudo: {entry['pseudo'][:60]}...\n"
+        history_text += f"   Time: {entry['time']:.2f}s"
+        if entry['bleu_4'] is not None:
+            history_text += f" | BLEU-4: {entry['bleu_4']:.4f}"
+        history_text += f"\n   Code: {entry['generated'][:80]}...\n\n"
+    return history_text
+# Create Gradio interface with custom CSS
+custom_css = """
+.gradio-container {
+    font-family: 'Arial', sans-serif;
+}
+.output-code {
+    font-family: 'Courier New', monospace;
+    font-size: 14px;
+}
+.metrics-box {
+    background-color: #f0f8ff;
+    border-radius: 8px;
+    padding: 10px;
+}
+"""
+with gr.Blocks(title="🚀 GPT-2 Pseudo-Code to Code Generator", theme=gr.themes.Soft(), css=custom_css) as demo:
+    gr.Markdown("""
+    # 🚀 GPT-2 Pseudo-Code to Python Code Generator
+    **Transform natural language descriptions into executable Python code using fine-tuned GPT-2!**
+    This model is trained on the SPOC (Search-based Pseudo-code to Code) dataset and can generate Python code from pseudo-code descriptions.
+    """)
+    with gr.Tabs():
+        # Tab 1: Code Generation
+        with gr.Tab("💻 Code Generation"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📥 Step 1: Load Model")
+                    model_file = gr.File(
+                        label="Upload best_model.pkl",
+                        file_types=[".pkl"],
+                        type="filepath"
+                    )
+                    load_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
+                    model_status = gr.Textbox(
+                        label="Model Status",
+                        lines=15,
+                        interactive=False,
+                        placeholder="Upload model file and click 'Load Model'..."
+                    )
+            gr.Markdown("---")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### ✍️ Step 2: Enter Pseudo-Code")
+                    # Example selector
+                    with gr.Row():
+                        example_dropdown = gr.Dropdown(
+                            choices=["Basic Loop", "Function Definition", "List Iteration",
+                                   "Conditional Check", "Sorting", "Maximum Element",
+                                   "Binary Search", "Factorial", "Palindrome", "Fibonacci"],
+                            label="📚 Load Example",
+                            value=None
+                        )
+                    pseudo_input = gr.Textbox(
+                        label="Pseudo-Code Description",
+                        placeholder="Example: create a function to calculate factorial of a number",
+                        lines=4
+                    )
+                    reference_code = gr.Code(
+                        label="Reference Code (Optional - for BLEU score calculation)",
+                        language="python",
+                        lines=4,
+                        placeholder="Paste reference code here to calculate BLEU scores..."
+                    )
+                    gr.Markdown("### ⚙️ Generation Parameters")
+                    with gr.Row():
+                        max_length = gr.Slider(
+                            minimum=50,
+                            maximum=500,
+                            value=150,
+                            step=10,
+                            label="Max Length",
+                            info="Maximum tokens to generate"
+                        )
+                        temperature = gr.Slider(
+                            minimum=0.1,
+                            maximum=1.5,
+                            value=0.7,
+                            step=0.1,
+                            label="Temperature",
+                            info="Higher = more creative"
+                        )
+                    with gr.Row():
+                        top_k = gr.Slider(
+                            minimum=10,
+                            maximum=100,
+                            value=50,
+                            step=5,
+                            label="Top-K",
+                            info="Vocabulary filtering"
+                        )
+                        top_p = gr.Slider(
+                            minimum=0.5,
+                            maximum=1.0,
+                            value=0.95,
+                            step=0.05,
+                            label="Top-P",
+                            info="Nucleus sampling"
+                        )
+                    num_sequences = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        value=1,
+                        step=1,
+                        label="Number of Variations",
+                        info="Generate multiple versions"
+                    )
+                    with gr.Row():
+                        generate_btn = gr.Button("✨ Generate Code", variant="primary", size="lg")
+                        clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 💻 Generated Python Code")
+                    code_output = gr.Code(
+                        label="Generated Code",
+                        language="python",
+                        lines=12,
+                        elem_classes="output-code"
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            metrics_output = gr.Textbox(
+                                label="📊 Performance Metrics",
+                                lines=8,
+                                interactive=False,
+                                elem_classes="metrics-box"
+                            )
+                        with gr.Column():
+                            bleu_output = gr.Textbox(
+                                label="🎯 BLEU Scores",
+                                lines=8,
+                                interactive=False,
+                                elem_classes="metrics-box"
+                            )
+                    alternatives_output = gr.Markdown(
+                        label="🔄 Alternative Generations"
+                    )
+        # Tab 2: Information & Guide
+        with gr.Tab("📖 Guide & Examples"):
+            gr.Markdown("""
+            ## 📚 How to Use
+            ### 1️⃣ Load Your Model
+            - Upload the `best_model.pkl` file (trained GPT-2 model)
+            - Click "Load Model" and wait for confirmation
+            - You'll see model configuration and training metrics
+            ### 2️⃣ Generate Code
+            - **Quick Start**: Select an example from the dropdown
+            - **Custom Input**: Type your own pseudo-code description
+            - **Optional**: Add reference code to calculate BLEU scores
+            - Adjust generation parameters for different outputs
+            - Click "Generate Code"
+            ### 3️⃣ Understand the Metrics
+            #### 🎯 BLEU Score (Bilingual Evaluation Understudy)
+            - Measures similarity between generated and reference code
+            - **BLEU-1**: Word-level similarity (unigrams)
+            - **BLEU-2**: 2-word phrase similarity (bigrams)
+            - **BLEU-3**: 3-word phrase similarity (trigrams)
+            - **BLEU-4**: 4-word phrase similarity (most comprehensive)
+            **Score Interpretation:**
+            - 🟢 **> 0.4**: Excellent match - Generated code is very similar to reference
+            - 🟡 **0.3-0.4**: Good match - Code captures most key elements
+            - 🟠 **0.2-0.3**: Fair match - Some similarity exists
+            - 🔴 **< 0.2**: Poor match - Significant differences
+            #### 📈 Additional Metrics
+            - **Precision**: How many generated words appear in reference
+            - **Recall**: How many reference words appear in generated code
+            - **F1-Score**: Harmonic mean of precision and recall
+            - **Length Ratio**: Generated vs reference code length
+            - **Character Overlap**: Character-level similarity
+            ### 🎛️ Generation Parameters
+            | Parameter | Low Value | High Value | Use Case |
+            |-----------|-----------|------------|----------|
+            | **Temperature** | 0.1-0.3 | 0.8-1.2 | Low: Deterministic, focused<br>High: Creative, diverse |
+            | **Top-K** | 10-30 | 60-100 | Low: Conservative choices<br>High: More variety |
+            | **Top-P** | 0.5-0.8 | 0.9-1.0 | Low: Safe predictions<br>High: Exploratory |
+            | **Max Length** | 50-100 | 200-500 | Short: Simple code<br>Long: Complex implementations |
+            ---
+            ## 💡 Example Pseudo-Code Prompts
+            ### Basic Operations
+            ```
+            create a list of numbers from 1 to 10
+            define a function to calculate the sum of two numbers
+            iterate through a list and print each element
+            ```
+            ### Conditionals & Logic
+            ```
+            check if a number is even or odd
+            find the maximum of three numbers
+            validate if a string is empty
+            ```
+            ### Data Structures
+            ```
+            sort a list in descending order
+            remove duplicates from a list
+            merge two dictionaries
+            ```
+            ### Algorithms
+            ```
+            implement binary search algorithm
+            create a recursive function to calculate factorial
+            generate fibonacci sequence up to n terms
+            check if a string is palindrome
+            ```
+            ### Advanced
+            ```
+            create a class to represent a student with name and grades
+            implement a function to read CSV file and return dataframe
+            create a decorator to measure function execution time
+            ```
+            ---
+            ## 🎓 About the Model
+            This model is fine-tuned on the **SPOC (Search-based Pseudo-code to Code)** dataset:
+            - 📄 Paper: [SPOC: Search-based Pseudo-code to Code](https://arxiv.org/pdf/1906.04908)
+            - 🏛️ Source: Stanford University
+            - 🤖 Base Model: GPT-2 (Decoder-Only Transformer)
+            - 📊 Training: 10,000+ pseudo-code to code pairs
+            - 🎯 Task: Causal Language Modeling
+            ---
+            ## ⚠️ Limitations
+            - Model may not handle very complex algorithms perfectly
+            - Generated code should be tested before production use
+            - Best results with clear, specific pseudo-code descriptions
+            - Model trained on C++ code, adapted for Python generation
+            ---
+            ## 🤝 Tips for Best Results
+            1. ✅ **Be Specific**: "create a function to sort list in ascending order" vs "sort list"
+            2. ✅ **Use Action Words**: "create", "define", "implement", "calculate"
+            3. ✅ **Mention Data Types**: "list", "string", "dictionary", "integer"
+            4. ✅ **Include Details**: "recursive function" vs just "function"
+            5. ✅ **Try Variations**: Generate multiple times with different temperatures
+            """)
+        # Tab 3: History
+        with gr.Tab("📜 History"):
+            gr.Markdown("## 📊 Generation History")
+            history_display = gr.Textbox(
+                label="Recent Generations",
+                lines=20,
+                interactive=False
+            )
+            refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary")
+    gr.Markdown("""
+    ---
+    ### 🌟 Features
+    - ✅ Upload and use custom trained models
+    - ✅ BLEU score calculation for quality assessment
+    - ✅ Multiple evaluation metrics (Precision, Recall, F1)
+    - ✅ Generate multiple code variations
+    - ✅ Real-time performance tracking
+    - ✅ Example prompts library
+    - ✅ Generation history
+    ### 📝 Citation
+    If you use this model, please cite:
+    ```
+    @article{kulal2019spoc,
+      title={SPOC: Search-based Pseudo-code to Code},
+      author={Kulal, Sumith and Pasupat, Panupong and Chandra, Kartik and Lee, Mina and Padon, Oded and Aiken, Alex and Liang, Percy},
+      journal={arXiv preprint arXiv:1906.04908},
+      year={2019}
+    }
+    ```
+    **Built with ❤️ using HuggingFace Transformers & Gradio**
+    """)
+    # Event handlers
+    load_btn.click(
+        fn=load_model_from_pickle,
+        inputs=[model_file],
+        outputs=[model_status]
+    )
+    example_dropdown.change(
+        fn=show_examples,
+        inputs=[example_dropdown],
+        outputs=[pseudo_input]
+    )
+    generate_btn.click(
+        fn=generate_code_from_pseudo,
+        inputs=[pseudo_input, max_length, temperature, top_k, top_p, num_sequences, reference_code],
+        outputs=[code_output, metrics_output, bleu_output, alternatives_output]
+    )
+    clear_btn.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[pseudo_input, reference_code, code_output, metrics_output, bleu_output,
+                max_length, temperature, top_k, top_p, num_sequences]
+    )
+    refresh_history_btn.click(
+        fn=show_history,
+        inputs=[],
+        outputs=[history_display]
+    )
+# Launch the interface
+if __name__ == "__main__":
+    demo.launch(share=False)