import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Set page config st.set_page_config( page_title="Phi2 GPro Chat", page_icon="🚀", layout="wide" ) # Initialize session state for chat history if it doesn't exist if "messages" not in st.session_state: st.session_state.messages = [] @st.cache_resource def load_model(): from peft import PeftModel, PeftConfig # Load base model and tokenizer tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2") base_model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True ) # Load and apply adapter weights model = PeftModel.from_pretrained(base_model, "sft-model") return model, tokenizer def generate_response(prompt, model, tokenizer, max_length=512): inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove the input prompt from the response response = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip() return response # Main app st.title("Phi2 GPro Chat 🚀") # Load model try: model, tokenizer = load_model() st.success("Model loaded successfully! Ready to chat.") except Exception as e: st.error(f"Error loading model: {str(e)}") st.stop() # Display chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.write(message["content"]) # Chat input if prompt := st.chat_input("What would you like to discuss?"): # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.write(prompt) # Generate response with st.chat_message("assistant"): with st.spinner("Thinking..."): # Prepare conversation history conversation = "" for msg in st.session_state.messages: if msg["role"] == "user": conversation += f"User: {msg['content']}\n" else: conversation += f"Assistant: {msg['content']}\n" response = generate_response(conversation, model, tokenizer) st.write(response) st.session_state.messages.append({"role": "assistant", "content": response}) # Add a sidebar with information with st.sidebar: st.title("About") st.markdown(""" This is a chatbot powered by the Phi2 GPro model. Feel free to ask questions and engage in conversation! **Features:** - Contextual responses - Memory of conversation - Fast inference """)