import gradio as gr from transformers import pipeline import re import os # Grab the HF token from the Space secrets so we can access Google's gated model hf_token = os.environ.get("HF_TOKEN") # Load Gemma 3 270M. It is extremely fast and capable for text completion. generator = pipeline( "text-generation", model="google/gemma-3-270m", device=-1, token=hf_token ) def predict_next_word(context): if not context.strip(): return "" # Generate a few tokens to ensure a full next word result = generator( context, max_new_tokens=5, return_full_text=False, do_sample=False, # Greedy decoding for absolute highest probability pad_token_id=generator.tokenizer.eos_token_id ) generated_text = result[0]["generated_text"] # Cleanly extract just the very first word (ignoring leading spaces) match = re.search(r'^\s*([a-zA-Z0-9\'-]+)', generated_text) if match: next_word = match.group(1) else: # Fallback in case the model generates punctuation first next_word = generated_text.strip().split()[0] if generated_text.strip() else "" return next_word # Create the Gradio Interface demo = gr.Interface( fn=predict_next_word, inputs=gr.Textbox(lines=2, placeholder="e.g., 'How is it'", label="Context"), outputs=gr.Textbox(label="Most Likely Next Word"), title="Next Word Predictor (Gemma 3 270M)", description="Enter a string of context and the model will instantly return the most likely next word.", flagging_mode="never" ) if __name__ == "__main__": demo.launch()