import streamlit as st from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import nltk # Download the necessary NLTK data nltk.download('punkt') # Constants MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct" MAX_LENGTH = 512 RESPONSE_MAX_LENGTH = 50 RESPONSE_MIN_LENGTH = 20 LENGTH_PENALTY = 1.0 NUM_BEAMS = 2 NO_REPEAT_NGRAM_SIZE = 2 TEMPERATURE = 0.9 TOP_K = 30 TOP_P = 0.85 # Load Pre-Trained Model and Tokenizer @st.cache_resource def load_model(): """Load the pre-trained model and tokenizer""" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) return tokenizer, model # Function to generate a response using the model def generate_response(text, tokenizer, model): """Generate a response using the model""" input_ids = tokenizer.encode(text, return_tensors="pt", max_length=MAX_LENGTH, truncation=True) response_ids = model.generate( input_ids=input_ids, max_length=RESPONSE_MAX_LENGTH, min_length=RESPONSE_MIN_LENGTH, length_penalty=LENGTH_PENALTY, num_beams=NUM_BEAMS, no_repeat_ngram_size=NO_REPEAT_NGRAM_SIZE, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, do_sample=True ) output = tokenizer.decode(response_ids[0], skip_special_tokens=True) return output # Function to format messages for display def format_messages_for_display(messages): """Format messages for display""" formatted_text = [] for message in messages: if message["role"] == "assistant": formatted_text.append(f"**Assistant**: {message['content']}") else: formatted_text.append(f"**User**: {message['content']}") return "\n\n".join(formatted_text) # Main function to run the Streamlit app def main(): """Run the Streamlit app""" st.set_page_config(page_title="LLaMA Chat Interface", page_icon="", layout="wide") st.title("LLaMA Chat Interface") st.write("This is a chat interface using the LLaMA model for generating responses. Enter a prompt below to start chatting with the model.") # Load the model and tokenizer tokenizer, model = load_model() if'messages' not in st.session_state: st.session_state['messages'] = [] # Display chat messages chat_placeholder = st.empty() with chat_placeholder.container(): st.markdown(format_messages_for_display(st.session_state['messages'])) # Add text input and send button user_input = st.text_input("Enter your prompt:", key="user_input") if st.button("Send") and user_input.strip(): # Store user's message st.session_state['messages'].append({"role": "user", "content": user_input}) # Generate and store the assistant's response with st.spinner("Generating response..."): response = generate_response(user_input, tokenizer, model) st.session_state['messages'].append({"role": "assistant", "content": response}) # Update chat display with chat_placeholder.container(): st.markdown(format_messages_for_display(st.session_state['messages'])) # Option to clear the chat history if st.button("Clear Chat"): st.session_state['messages'] = [] with chat_placeholder.container(): st.markdown("") if __name__ == '__main__': main()