Spaces:
Sleeping
Sleeping
File size: 3,705 Bytes
99e51c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
"β
Model {model_name} loaded successfully on {DEVICE_STR}!")
return tokenizer, model
except ValueError as e:
if "Unrecognized configuration class" in str(e):
progress_placeholder.error(f"β Error: {model_name} is not a causal language model suitable for text generation. Please select a different model.")
st.error(f"Technical details: {str(e)}")
else:
progress_placeholder.error(f"β Error loading model: {str(e)}")
raise e
except Exception as e:
progress_placeholder.error(f"β Unexpected error loading model: {str(e)}")
raise e
tokenizer, model = load_model(MODEL_NAME)
def generate_text(prompt, max_new_tokens=150, temperature=0.7, top_p=0.9):
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text
# ---------- Streamlit UI ----------
st.title(f"Language Model Text Generator ({DEVICE_STR.upper()})")
st.caption("Choose from various pre-trained language models for text generation")
prompt = st.text_area(
"Enter prompt (English or other supported languages depending on model)",
value="The future of artificial intelligence is",
height=150,
)
max_new_tokens = st.slider("Max output tokens", 32, 512, 150)
temperature = st.slider("Temperature", 0.1, 1.2, 0.7)
top_p = st.slider("Top-p (nucleus sampling)", 0.1, 1.0, 0.9)
if st.button("Generate"):
# Create progress placeholder
progress_container = st.container()
with progress_container:
progress_bar = st.progress(0)
status_text = st.empty()
try:
status_text.text("π Preparing input...")
progress_bar.progress(25)
status_text.text("π€ Generating text... (this may take 20-40s on CPU)")
progress_bar.progress(50)
output = generate_text(prompt, max_new_tokens, temperature, top_p)
progress_bar.progress(100)
status_text.text("β
Generation complete!")
# Clear progress indicators after a short delay
import time
time.sleep(1)
progress_bar.empty()
status_text.empty()
st.subheader("Model output:")
st.write(output)
except Exception as e:
progress_bar.empty()
status_text.empty()
st.error(f"β Generation failed: {e}")
st.markdown("---")
# Model Status Section
st.subheader("π Model Status")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Current Model", MODEL_NAME)
with col2:
st.metric("Device", DEVICE_STR.upper())
with col3:
# Check if model is loaded by trying to access it
try:
model_params = sum(p.numel() for p in model.parameters())
st.metric("Model Parameters", f"{model_params:,}")
except:
st.metric("Model Parameters", "Loading...")
st.markdown("---")
st.markdown(
"""
**Tips**
- First run will download model to `~/.cache/huggingface`.
- DialoGPT models work well for conversational text.
- GPT-2/DistilGPT-2 work best with English prompts.
- Use smaller models (DialoGPT-small, DistilGPT-2) for faster CPU response.
"""
) |