Spaces:

QUT-GenAILab
/

next-word-predictor

Running

App Files Files Community

willsh1997 commited on Jun 19, 2025

Commit

6ddb051

1 Parent(s): 240ea59

:sparkles: initial commit - token prediction app

Browse files

Files changed (3) hide show

README.md +14 -2
next_word_predictor.py +183 -0
requirements.txt +87 -0

README.md CHANGED Viewed

@@ -1,2 +1,14 @@
-# widget-token-predictor
-widget for demonstrating next token prediction in GPT2

+---
+title: Next Word Predictor
+emoji: 🏆
+colorFrom: red
+colorTo: green
+sdk: gradio
+sdk_version: 5.23.3
+app_file: next_word_predictor.py
+pinned: false
+license: apache-2.0
+short_description: generates linkedin posts from freetext entries
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

next_word_predictor.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import gradio as gr
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+import torch.nn.functional as F
+import spaces
+class NextWordPredictor:
+    def __init__(self):
+        # Load pre-trained GPT-2 model and tokenizer
+        self.model_name = "gpt2"
+        self.tokenizer = GPT2Tokenizer.from_pretrained(self.model_name)
+        self.model = GPT2LMHeadModel.from_pretrained(self.model_name)
+        # Set padding token
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Set model to evaluation mode
+        self.model.eval()
+    @spaces.GPU
+    def predict_next_words(self, text, top_k=10):
+        """
+        Predict the next word given input text
+        Returns top_k most likely words with their probabilities and suggested words
+        """
+        text = text.strip()
+        if not text:
+            return [], []
+        # Tokenize input text
+        inputs = self.tokenizer.encode(text, return_tensors='pt')
+        # Get model predictions
+        with torch.no_grad():
+            outputs = self.model(inputs)
+            predictions = outputs.logits[0, -1, :]  # Get last token predictions
+        # Apply softmax to get probabilities
+        probabilities = F.softmax(predictions, dim=-1)
+        # Get top k predictions
+        top_k_probs, top_k_indices = torch.topk(probabilities, top_k)
+        # Convert to readable format with aligned progress bars
+        results = []
+        suggested_words = []
+        # Find the longest word for alignment
+        words_with_probs = []
+        for prob, idx in zip(top_k_probs, top_k_indices):
+            word = self.tokenizer.decode(idx.item()).strip()
+            probability = prob.item()
+            percentage = probability * 100
+            words_with_probs.append((word, probability, percentage))
+        # Find max word length for alignment
+        max_word_length = max(len(word) for word, _, _ in words_with_probs)
+        for word, probability, percentage in words_with_probs:
+            # Create aligned progress bar with better blocks
+            bar_length = 20
+            filled_length = int(bar_length * probability)
+            bar = '█' * filled_length + '▢' * (bar_length - filled_length)
+            # Align everything properly
+            word_padded = word.ljust(max_word_length)
+            result = f"{word_padded} | {probability:.4f} ({percentage:5.2f}%) {bar}"
+            results.append(result)
+            suggested_words.append(word)
+        return results, suggested_words
+# Initialize the predictor
+predictor = NextWordPredictor()
+def update_predictions(text):
+    """Update predictions based on current text"""
+    predictions_list, suggested_words = predictor.predict_next_words(text)
+    if not predictions_list:
+        return [gr.update(visible=False)] * 10
+    # Update buttons with predictions, hide unused ones
+    updates = []
+    for i in range(10):
+        if i < len(predictions_list):
+            updates.append(gr.update(value=predictions_list[i], visible=True))
+        else:
+            updates.append(gr.update(visible=False))
+    return updates
+def add_word_to_text(current_text, button_value):
+    """Extract word from button and add to text"""
+    if not button_value:
+        return current_text
+    # Extract the word (everything before the first "|")
+    word = button_value.split(" | ")[0].strip()
+    if not current_text.strip():
+        return word
+    # Add space if text doesn't end with space
+    if current_text.endswith(' '):
+        return current_text + word
+    else:
+        return current_text + ' ' + word
+# Create Gradio interface
+with gr.Blocks(title="Next Word Predictor", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Next Word Predictor")
+    gr.Markdown("Type a sentence and see the top 10 most likely next words with their probabilities! **Click on any prediction to add that word to your text.**")
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_input = gr.Textbox(
+                label="Enter your text",
+                placeholder="Start typing a sentence...",
+                lines=4,
+                interactive=True
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### Top 10 Next Word Predictions")
+            gr.Markdown("*Click any prediction below to add it to your text*")
+            # Create 10 clickable buttons for predictions
+            prediction_buttons = []
+            for i in range(10):
+                btn = gr.Button(
+                    value="",
+                    visible=False,
+                    variant="secondary",
+                    size="sm"
+                )
+                prediction_buttons.append(btn)
+    # Update predictions as user types
+    text_input.change(
+        fn=update_predictions,
+        inputs=text_input,
+        outputs=prediction_buttons
+    )
+    # Add click handlers for each prediction button
+    for btn in prediction_buttons:
+        btn.click(
+            fn=add_word_to_text,
+            inputs=[text_input, btn],
+            outputs=text_input
+        ).then(
+            fn=update_predictions,
+            inputs=text_input,
+            outputs=prediction_buttons
+        )
+    # Examples
+    gr.Examples(
+        examples=[
+            ["The weather today is"],
+            ["I love to eat"],
+            ["Machine learning is"],
+            ["The quick brown fox"],
+            ["In the future, we will"]
+        ],
+        inputs=text_input
+    )
+    gr.Markdown("### How it works:")
+    gr.Markdown("""
+    - Uses GPT-2 language model to predict next words
+    - Applies softmax to convert logits to probabilities
+    - Shows top 10 most likely words with percentages and aligned visual bars
+    - Updates predictions in real-time as you type
+    - **Click on any prediction button to add that word to your text automatically**
+    - Progress bars show relative probability: █ = filled, ▢ = empty outline
+    - All bars are perfectly aligned for easy comparison
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,87 @@

+accelerate==1.4.0
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.8.0
+asttokens==3.0.0
+bitsandbytes==0.45.4
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+comm==0.2.2
+debugpy==1.8.12
+decorator==5.1.1
+exceptiongroup==1.2.2
+executing==2.2.0
+fastapi==0.115.8
+ffmpy==0.5.0
+filelock==3.17.0
+fsspec==2025.2.0
+gradio==5.16.1
+gradio_client==1.7.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+idna==3.10
+ipykernel==6.29.5
+ipython==8.32.0
+jedi==0.19.2
+Jinja2==3.1.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+numpy==2.2.3
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.1.0
+platformdirs==4.3.6
+prompt_toolkit==3.0.50
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.1
+PyYAML==6.0.2
+pyzmq==26.2.1
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.9.6
+safehttpx==0.1.6
+safetensors==0.5.2
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.45.3
+sympy==1.13.1
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.4.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.49.0
+typer==0.15.1
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0
+wcwidth==0.2.13
+websockets==14.2