Spaces:
Sleeping
Sleeping
| # pip install transformers | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import streamlit as st | |
| checkpoint = "HuggingFaceTB/SmolLM-135M-Instruct" | |
| device = "cpu" # for GPU use "gpu" usage or "cpu" for CPU usage | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")` | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) | |
| st.title("Dexy Chat Assistant") | |
| # Initialize session state for chat history | |
| if 'messages' not in st.session_state: | |
| st.session_state.messages = [] | |
| # Text input for user | |
| user_name = st.text_input("Your name please?: ", key="user_name") | |
| user_input = st.text_input("Enter your message:", key="user_input") | |
| if st.button("Send"): | |
| if user_input: | |
| # Add user message to history | |
| st.session_state.messages.append({"role": "user", "content": user_input}) | |
| # Process with model | |
| input_text = tokenizer.apply_chat_template(st.session_state.messages, tokenize=False) | |
| encoded = tokenizer(input_text, return_tensors="pt", padding=True) | |
| inputs = encoded.input_ids.to(device) | |
| attention_mask = encoded.attention_mask.to(device) | |
| outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True) | |
| response = tokenizer.decode(outputs[0]) | |
| # Add assistant's response to history | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| # Display full chat history | |
| for msg in st.session_state.messages: | |
| if msg["role"] == "user": | |
| st.write(f"{user_name}: {msg['content']}") | |
| else: | |
| # st.write(f"Dexy: {msg['content']}") | |
| st.write(f"Dexy: {msg['content'].split('<|im_start|>assistant')[-1].split('<|im_end|>')[0]}") | |