Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| def load_model_and_tokenizer(): | |
| """ | |
| Load model and tokenizer with Streamlit's caching to prevent reloading. | |
| """ | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "namannn/llama2-13b-hyperbolic-cluster-pruned", | |
| use_fast=True, # Use fast tokenizer if available | |
| trust_remote_code=True # Trust remote code for custom tokenizers | |
| ) | |
| # Ensure pad_token is set | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "namannn/llama2-13b-hyperbolic-cluster-pruned", | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True # Trust remote code for custom models | |
| ) | |
| return tokenizer, model | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| raise | |
| def generate_text(prompt, tokenizer, model, max_length): | |
| """ | |
| Generate text using the loaded model and tokenizer with detailed error handling. | |
| """ | |
| try: | |
| # Ensure input is on the correct device | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| # Generate text with more explicit parameters | |
| with torch.no_grad(): # Disable gradient calculation | |
| outputs = model.generate( | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs.get("attention_mask"), | |
| max_length=max_length + len(inputs["input_ids"][0]), | |
| num_return_sequences=1, | |
| no_repeat_ngram_size=2, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| temperature=0.7, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode the generated text | |
| generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) | |
| return generated_text.strip() | |
| except Exception as e: | |
| st.error(f"Error generating text: {e}") | |
| return None | |
| def main(): | |
| # Set page configuration | |
| st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️") | |
| # Page title and description | |
| st.title("Text Generation with LLaMa2-13b Hyperbolic Model") | |
| st.write("Enter a prompt below and the model will generate text.") | |
| # Load model and tokenizer | |
| try: | |
| tokenizer, model = load_model_and_tokenizer() | |
| except Exception as e: | |
| st.error(f"Failed to load model: {e}") | |
| return | |
| # System information | |
| st.sidebar.header("System Information") | |
| st.sidebar.write(f"Device: {model.device}") | |
| st.sidebar.write(f"Model Dtype: {model.dtype}") | |
| # User input for prompt | |
| prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away") | |
| # Slider for controlling the length of the output | |
| max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=500, value=150) | |
| # Button to trigger text generation | |
| if st.button("Generate Text"): | |
| if prompt: | |
| try: | |
| # Generate text | |
| generated_text = generate_text(prompt, tokenizer, model, max_length) | |
| # Display generated text | |
| if generated_text: | |
| st.subheader("Generated Text:") | |
| st.write(generated_text) | |
| else: | |
| st.warning("No text was generated. Please check the input and try again.") | |
| except Exception as e: | |
| st.error(f"Unexpected error during text generation: {e}") | |
| else: | |
| st.warning("Please enter a prompt to generate text.") | |
| if __name__ == "__main__": | |
| main() |