import torch import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # The cache directory is now set in the Dockerfile # os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" st.title("💬 DistilGPT-2 Playground") @st.cache_resource def load_model_and_tokenizer(model_id): tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="cpu", torch_dtype=torch.float32, low_cpu_mem_usage=True ) return tokenizer, model model_id = "distilgpt2" with st.spinner(f"Loading model {model_id}... This may take a while."): tokenizer, model = load_model_and_tokenizer(model_id) generator = pipeline("text-generation", model=model, tokenizer=tokenizer) prompt = st.text_area("Enter your prompt here:") if prompt: with st.spinner("Generating text..."): output = generator(prompt, max_length=100, temperature=0.7) st.write("### Output:") st.success(output[0]['generated_text'])