import gradio as gr
from transformers import pipeline
import re
import os

# Grab the HF token from the Space secrets so we can access Google's gated model
hf_token = os.environ.get("HF_TOKEN")

# Load Gemma 3 270M. It is extremely fast and capable for text completion.
generator = pipeline(
    "text-generation", 
    model="google/gemma-3-270m", 
    device=-1, 
    token=hf_token
)

def predict_next_word(context):
    if not context.strip():
        return ""
        
    # Generate a few tokens to ensure a full next word
    result = generator(
        context,
        max_new_tokens=5,
        return_full_text=False,  
        do_sample=False,         # Greedy decoding for absolute highest probability
        pad_token_id=generator.tokenizer.eos_token_id
    )
    
    generated_text = result[0]["generated_text"]
    
    # Cleanly extract just the very first word (ignoring leading spaces)
    match = re.search(r'^\s*([a-zA-Z0-9\'-]+)', generated_text)
    
    if match:
        next_word = match.group(1)
    else:
        # Fallback in case the model generates punctuation first
        next_word = generated_text.strip().split()[0] if generated_text.strip() else ""
        
    return next_word

# Create the Gradio Interface
demo = gr.Interface(
    fn=predict_next_word,
    inputs=gr.Textbox(lines=2, placeholder="e.g., 'How is it'", label="Context"),
    outputs=gr.Textbox(label="Most Likely Next Word"),
    title="Next Word Predictor (Gemma 3 270M)",
    description="Enter a string of context and the model will instantly return the most likely next word.",
    flagging_mode="never"
)

if __name__ == "__main__":
    demo.launch()