import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# The cache directory is now set in the Dockerfile
# os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"

st.title("💬 DistilGPT-2 Playground")

@st.cache_resource
def load_model_and_tokenizer(model_id):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="cpu",
        torch_dtype=torch.float32,
        low_cpu_mem_usage=True
    )
    return tokenizer, model

model_id = "distilgpt2"

with st.spinner(f"Loading model {model_id}... This may take a while."):
    tokenizer, model = load_model_and_tokenizer(model_id)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = st.text_area("Enter your prompt here:")
if prompt:
    with st.spinner("Generating text..."):
        output = generator(prompt, max_length=100, temperature=0.7)
        st.write("### Output:")
        st.success(output[0]['generated_text'])