import os os.environ['HOME'] = '/tmp' os.environ['XDG_CACHE_HOME'] = '/tmp/.cache' os.environ['HF_HOME'] = '/tmp/.hf' os.environ['TRANSFORMERS_CACHE'] = '/tmp/.hf/transformers' os.environ['STREAMLIT_HOME'] = '/tmp/.hf/streamlit' import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch @st.cache_resource def load_model(): model_name = "openchat/openchat-3.5-0106" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) return tokenizer, model tokenizer, model = load_model() st.title("OpenChat 🤖") if "messages" not in st.session_state: st.session_state.messages = [{"role": "assistant", "content": "Salut ! Pose-moi une question."}] for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) query = st.chat_input("Votre message...") if query: st.session_state.messages.append({"role": "user", "content": query}) with st.chat_message("user"): st.markdown(query) inputs = tokenizer(query, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True, top_p=0.95, top_k=50) response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.session_state.messages.append({"role": "assistant", "content": response}) with st.chat_message("assistant"): st.markdown(response)