Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| # Set page config | |
| st.set_page_config( | |
| page_title="Phi2 GPro Chat", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Initialize session state for chat history if it doesn't exist | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| def load_model(): | |
| from peft import PeftModel, PeftConfig | |
| # Load base model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2") | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| "microsoft/phi-2", | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True | |
| ) | |
| # Load and apply adapter weights | |
| model = PeftModel.from_pretrained(base_model, "sft-model") | |
| return model, tokenizer | |
| def generate_response(prompt, model, tokenizer, max_length=512): | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length) | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove the input prompt from the response | |
| response = response[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip() | |
| return response | |
| # Main app | |
| st.title("Phi2 GPro Chat π") | |
| # Load model | |
| try: | |
| model, tokenizer = load_model() | |
| st.success("Model loaded successfully! Ready to chat.") | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| st.stop() | |
| # Display chat messages | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.write(message["content"]) | |
| # Chat input | |
| if prompt := st.chat_input("What would you like to discuss?"): | |
| # Add user message to chat history | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.write(prompt) | |
| # Generate response | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking..."): | |
| # Prepare conversation history | |
| conversation = "" | |
| for msg in st.session_state.messages: | |
| if msg["role"] == "user": | |
| conversation += f"User: {msg['content']}\n" | |
| else: | |
| conversation += f"Assistant: {msg['content']}\n" | |
| response = generate_response(conversation, model, tokenizer) | |
| st.write(response) | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| # Add a sidebar with information | |
| with st.sidebar: | |
| st.title("About") | |
| st.markdown(""" | |
| This is a chatbot powered by the Phi2 GPro model. | |
| Feel free to ask questions and engage in conversation! | |
| **Features:** | |
| - Contextual responses | |
| - Memory of conversation | |
| - Fast inference | |
| """) |