Spaces:

ayushblip007
/

code_completion_v1

Sleeping

App Files Files Community

Ayush commited on Oct 19, 2025

Commit

8f40d24

1 Parent(s): f67957d

Added the code files

Browse files

Files changed (8) hide show

Dockerfile +20 -0
Procfile +1 -0
app.py +127 -0
main.py +107 -0
model.pt +3 -0
requirements.txt +3 -0
templates/index.html +183 -0
vocab.pkl +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application's code
+COPY . .
+# Expose the port the app runs on
+EXPOSE 7860
+# Command to run the app using Gunicorn
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: gunicorn app:app

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+import torch.nn as nn
+import pickle
+from flask import Flask, request, jsonify, render_template
+# --- Part 1: Re-define the Model Architecture ---
+# This class definition must be EXACTLY the same as in your training script.
+class ResidualLSTMModel(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_units, dropout_prob):
+        super(ResidualLSTMModel, self).__init__()
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embedding_dim=embedding_dim,
+            padding_idx=0
+        )
+        self.lstm1 = nn.LSTM(
+            input_size=embedding_dim,
+            hidden_size=hidden_units,
+            num_layers=1,
+            batch_first=True
+        )
+        self.lstm2 = nn.LSTM(
+            input_size=hidden_units,
+            hidden_size=hidden_units,
+            num_layers=1,
+            batch_first=True
+        )
+        self.dropout = nn.Dropout(dropout_prob)
+        self.fc = nn.Linear(hidden_units, vocab_size)
+    def forward(self, x):
+        embedded = self.embedding(x)
+        out1, _ = self.lstm1(embedded)
+        out2, _ = self.lstm2(out1)
+        residual_sum = out1 + out2
+        dropped_out = self.dropout(residual_sum)
+        logits = self.fc(dropped_out)
+        return logits
+# --- Part 2: Helper Functions for Processing Text ---
+def text_to_sequence(text, vocab, max_length):
+    tokens = text.split()
+    numericalized = [vocab.get(token, vocab.get('<UNK>', 1)) for token in tokens]
+    if len(numericalized) > max_length:
+        numericalized = numericalized[:max_length]
+    pad_id = vocab.get('<PAD>', 0)
+    padding_needed = max_length - len(numericalized)
+    padded = numericalized + [pad_id] * padding_needed
+    return torch.tensor([padded], dtype=torch.long)
+def sequence_to_text(sequence, vocab):
+    id_to_token = {id_val: token for token, id_val in vocab.items()}
+    tokens = [id_to_token.get(id_val.item(), '<UNK>') for id_val in sequence if id_val.item() != vocab.get('<PAD>', 0)]
+    return " ".join(tokens)
+# --- Part 3: Main Prediction Logic ---
+def predict_next_tokens(model, text, vocab, device, max_length=1000, top_k=5):
+    model.eval()
+    with torch.no_grad():
+        input_tensor = text_to_sequence(text, vocab, max_length).to(device)
+        logits = model(input_tensor)
+        num_input_tokens = len(text.split())
+        if num_input_tokens == 0:
+            return []
+        last_token_logits = logits[0, num_input_tokens - 1, :]
+        _, top_k_ids = torch.topk(last_token_logits, top_k, dim=-1)
+        top_k_tokens = [sequence_to_text([token_id], vocab) for token_id in top_k_ids]
+        return top_k_tokens
+# --- Part 4: Flask App Initialization ---
+app = Flask(__name__)
+# --- Configuration and Model Loading ---
+MODEL_PATH = 'model.pt'
+VOCAB_PATH = 'vocab.pkl'
+MAX_LENGTH = 1000
+device = torch.device("cpu") # Use CPU for inference on a typical web server
+# Load vocabulary
+try:
+    with open(VOCAB_PATH, 'rb') as f:
+        vocab = pickle.load(f)
+    print("Vocabulary loaded.")
+except FileNotFoundError:
+    print(f"Error: Vocabulary file not found at {VOCAB_PATH}")
+    vocab = None
+# Load the model
+try:
+    # Since the model was saved as a whole object, we need weights_only=False
+    model = torch.load(MODEL_PATH, map_location=device, weights_only=False)
+    model.eval() # Set model to evaluation mode
+    print("Model loaded.")
+except FileNotFoundError:
+    print(f"Error: Model file not found at {MODEL_PATH}")
+    model = None
+except Exception as e:
+    print(f"An error occurred while loading the model: {e}")
+    model = None
+# --- Flask Routes ---
+@app.route('/')
+def home():
+    return render_template('index.html')
+@app.route('/predict', methods=['POST'])
+def predict():
+    if not model or not vocab:
+        return jsonify({'error': 'Model or vocabulary not loaded. Check server logs.'}), 500
+    data = request.get_json()
+    code_snippet = data.get('code', '')
+    if not code_snippet.strip():
+        return jsonify({'suggestions': []})
+    try:
+        suggestions = predict_next_tokens(model, code_snippet, vocab, device, max_length=MAX_LENGTH)
+        return jsonify({'suggestions': suggestions})
+    except Exception as e:
+        print(f"Prediction error: {e}")
+        return jsonify({'error': 'Failed to get prediction.'}), 500
+if __name__ == '__main__':
+    app.run(debug=True)

main.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import torch
+import torch.nn as nn
+import pickle
+# --- Part 1: Re-define the Model Architecture ---
+# This class definition must be EXACTLY the same as in your training script.
+class ResidualLSTMModel(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_units, dropout_prob):
+        super(ResidualLSTMModel, self).__init__()
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embedding_dim=embedding_dim,
+            padding_idx=0
+        )
+        self.lstm1 = nn.LSTM(
+            input_size=embedding_dim,
+            hidden_size=hidden_units,
+            num_layers=1,
+            batch_first=True
+        )
+        self.lstm2 = nn.LSTM(
+            input_size=hidden_units,
+            hidden_size=hidden_units,
+            num_layers=1,
+            batch_first=True
+        )
+        self.dropout = nn.Dropout(dropout_prob)
+        self.fc = nn.Linear(hidden_units, vocab_size)
+    def forward(self, x):
+        embedded = self.embedding(x)
+        out1, _ = self.lstm1(embedded)
+        out2, _ = self.lstm2(out1)
+        residual_sum = out1 + out2
+        dropped_out = self.dropout(residual_sum)
+        logits = self.fc(dropped_out)
+        return logits
+# --- Part 2: Helper Functions for Processing Text ---
+def text_to_sequence(text, vocab, max_length):
+    """Converts a string of code into a padded tensor."""
+    tokens = text.split()
+    numericalized = [vocab.get(token, vocab['<UNK>']) for token in tokens]
+    if len(numericalized) > max_length:
+        numericalized = numericalized[:max_length]
+    pad_id = vocab['<PAD>']
+    padding_needed = max_length - len(numericalized)
+    padded = numericalized + [pad_id] * padding_needed
+    return torch.tensor([padded], dtype=torch.long)
+def sequence_to_text(sequence, vocab):
+    """Converts a tensor of token IDs back to a string."""
+    id_to_token = {id_val: token for token, id_val in vocab.items()}
+    tokens = [id_to_token.get(id_val.item(), '<UNK>') for id_val in sequence if id_val.item() != vocab['<PAD>']]
+    return " ".join(tokens)
+# --- Part 3: Main Prediction Logic ---
+def predict_next_tokens(model, text, vocab, device, max_length=1000, top_k=5):
+    """Predicts the top_k next tokens for a given text input."""
+    model.eval()
+    with torch.no_grad():
+        input_tensor = text_to_sequence(text, vocab, max_length).to(device)
+        logits = model(input_tensor)
+        num_input_tokens = len(text.split())
+        last_token_logits = logits[0, num_input_tokens - 1, :]
+        _, top_k_ids = torch.topk(last_token_logits, top_k, dim=-1)
+        top_k_tokens = [sequence_to_text([token_id], vocab) for token_id in top_k_ids]
+        return top_k_tokens
+if __name__ == '__main__':
+    # --- Configuration ---
+    MODEL_PATH = 'model.pt'
+    VOCAB_PATH = 'vocab.pkl' # <-- Updated to use .pkl
+    MAX_LENGTH = 1000
+    # --- Load everything ---
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    # Load vocabulary using pickle
+    with open(VOCAB_PATH, 'rb') as f: # <-- Use 'rb' for reading bytes
+        vocab = pickle.load(f)
+    print("Vocabulary loaded.")
+    # Load the model
+    model = torch.load(MODEL_PATH, map_location=device , weights_only=False)
+    print("Model loaded.")
+    # --- Make a Prediction ---
+    input_code = "import numpy as" # Example input
+    print(f"\nInput code: '{input_code}'")
+    suggestions = predict_next_tokens(model, input_code, vocab, device, max_length=MAX_LENGTH)
+    print("\nTop 5 suggestions:")
+    for i, suggestion in enumerate(suggestions):
+        print(f"{i+1}. {suggestion}")

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85ba12ee7eccdd7aed642f5dbcd46094cd5f32c501e3c237fe1d4e85ea11ac00
+size 45484701

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Flask
+torch
+gunicorn

templates/index.html ADDED Viewed

	@@ -0,0 +1,183 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Code Completion AI</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <style>
+        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+        html {
+            scroll-behavior: smooth;
+        }
+        body {
+            font-family: 'Inter', sans-serif;
+            background-color: #0f172a; /* slate-900 */
+            background-image: radial-gradient(circle at 1px 1px, rgba(255,255,255,0.05) 1px, transparent 0);
+            background-size: 2rem 2rem;
+        }
+        .suggestion-item {
+            transition: all 0.2s ease-in-out;
+        }
+        .info-card-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
+            gap: 1rem;
+        }
+    </style>
+</head>
+<body class="text-gray-200 min-h-screen flex flex-col items-center justify-center p-4">
+    <main class="w-full max-w-5xl mx-auto grid grid-cols-1 lg:grid-cols-5 gap-8 lg:gap-12">
+        <!-- Left Column: Interaction -->
+        <div class="lg:col-span-3 bg-slate-900/50 backdrop-blur-sm border border-slate-700 rounded-2xl shadow-2xl p-6 md:p-8">
+            <div class="text-left mb-8">
+                <h1 class="text-4xl font-bold text-white tracking-tight">Code Completion AI</h1>
+                <p class="text-slate-400 mt-2">Enter a Python code snippet to get AI-powered suggestions.</p>
+            </div>
+            <div>
+                <label for="code-input" class="block text-sm font-medium text-slate-300 mb-2">Python Snippet</label>
+                <textarea id="code-input"
+                          class="w-full h-48 p-4 bg-slate-900 border border-slate-700 rounded-lg text-slate-200 focus:ring-2 focus:ring-sky-500 focus:border-sky-500 transition duration-200 resize-none font-mono text-sm"
+                          placeholder="e.g., import numpy as"></textarea>
+            </div>
+            <div class="mt-6 text-left">
+                <button id="predict-btn"
+                        class="bg-sky-600 hover:bg-sky-700 text-white font-bold py-3 px-6 rounded-lg transition duration-300 ease-in-out transform hover:scale-105 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-offset-slate-900 focus:ring-sky-500 flex items-center justify-center">
+                    <span id="btn-text">Get Suggestions</span>
+                    <span id="spinner" class="hidden">
+                        <svg class="animate-spin h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
+                            <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                            <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+                        </svg>
+                    </span>
+                </button>
+            </div>
+            <div id="results-container" class="mt-8">
+                <h2 class="text-lg font-semibold text-white mb-3">Top 5 Suggestions</h2>
+                <div id="suggestions" class="bg-slate-900 p-3 rounded-lg min-h-[160px] border border-slate-700 space-y-1">
+                    <p id="placeholder-text" class="text-slate-500 p-2">Suggestions will appear here...</p>
+                </div>
+            </div>
+        </div>
+        <!-- Right Column: Model Info -->
+        <div class="lg:col-span-2 space-y-6">
+            <div class="bg-slate-900/50 backdrop-blur-sm border border-slate-700 rounded-2xl shadow-xl p-6">
+                <h3 class="text-2xl font-bold text-white mb-4">Model Details</h3>
+                <p class="text-slate-400 mb-6">
+                    This app uses a <span class="text-sky-400 font-semibold">Residual LSTM</span> model with two LSTM layers and a skip connection. It was trained on the Python subset of the <span class="text-sky-400">CodeXGlue</span> dataset to predict the next token in a sequence.
+                </p>
+                <div class="info-card-grid">
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Top-5 Accuracy</p>
+                        <p class="text-xl font-semibold text-white">86.82%</p>
+                    </div>
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Perplexity</p>
+                        <p class="text-xl font-semibold text-white">4.19</p>
+                    </div>
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Embedding Dim</p>
+                        <p class="text-xl font-semibold text-white">256</p>
+                    </div>
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Hidden Units</p>
+                        <p class="text-xl font-semibold text-white">512</p>
+                    </div>
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Vocab Size</p>
+                        <p class="text-xl font-semibold text-white">10,002</p>
+                    </div>
+                    <div class="bg-slate-800 p-4 rounded-lg border border-slate-700">
+                        <p class="text-sm text-slate-400">Parameters</p>
+                        <p class="text-xl font-semibold text-white">~15.5 M</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </main>
+    <footer class="w-full max-w-5xl mx-auto text-center text-slate-500 py-8 mt-4">
+        <p>Made with ❤️ by Ayush</p>
+    </footer>
+    <script>
+        const codeInput = document.getElementById('code-input');
+        const predictBtn = document.getElementById('predict-btn');
+        const suggestionsDiv = document.getElementById('suggestions');
+        const placeholderText = document.getElementById('placeholder-text');
+        const btnText = document.getElementById('btn-text');
+        const spinner = document.getElementById('spinner');
+        let debounceTimer;
+        const getPredictions = async () => {
+            const code = codeInput.value;
+            if (code.trim() === '') {
+                suggestionsDiv.innerHTML = '<p id="placeholder-text" class="text-slate-500 p-2">Suggestions will appear here...</p>';
+                return;
+            }
+            btnText.classList.add('hidden');
+            spinner.classList.remove('hidden');
+            predictBtn.disabled = true;
+            try {
+                const response = await fetch('/predict', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ code: code }),
+                });
+                if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);
+                const data = await response.json();
+                if (data.error) {
+                    suggestionsDiv.innerHTML = `<p class="text-red-400 p-2">${data.error}</p>`;
+                    return;
+                }
+                if (data.suggestions && data.suggestions.length > 0) {
+                    suggestionsDiv.innerHTML = '';
+                    data.suggestions.forEach(suggestion => {
+                        const p = document.createElement('p');
+                        p.textContent = suggestion;
+                        p.className = 'suggestion-item p-2 rounded hover:bg-slate-700 cursor-pointer text-slate-300';
+                        p.onclick = () => {
+                            const lastCharIsSpace = codeInput.value.slice(-1) === ' ';
+                            codeInput.value += (lastCharIsSpace ? '' : ' ') + suggestion;
+                            codeInput.focus();
+                            getPredictions();
+                        };
+                        suggestionsDiv.appendChild(p);
+                    });
+                } else {
+                    suggestionsDiv.innerHTML = '<p class="text-slate-500 p-2">No suggestions found.</p>';
+                }
+            } catch (error) {
+                console.error('Error:', error);
+                suggestionsDiv.innerHTML = '<p class="text-red-400 p-2">An error occurred. Check server logs.</p>';
+            } finally {
+                btnText.classList.remove('hidden');
+                spinner.classList.add('hidden');
+                predictBtn.disabled = false;
+            }
+        };
+        predictBtn.addEventListener('click', getPredictions);
+        codeInput.addEventListener('input', () => {
+            clearTimeout(debounceTimer);
+            debounceTimer = setTimeout(getPredictions, 500); // 500ms debounce
+        });
+    </script>
+</body>
+</html>

vocab.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c19847861d949bbb046b890ac5fe8b0b11117eeeca46801ca82815ae3f071dcf
+size 131947