Spaces:

Nikhil0987
/

gemma_text

Sleeping

Nikhil0987 commited on Mar 7, 2024

Commit

4161eb3

verified ·

1 Parent(s): 829651d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,32 +1,30 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Title
-st.title("Text Generation ")
-# IMPORTANT: Model Loading Considerations
-# st.warning("Large Language Models (LLMs) like Llama 2-70b can be resource-intensive. This is a simplified demo. Loading the full model might exceed memory limits on standard machines.")
-# Function to load a small portion of the model (for demo purposes)
-@st.cache_resource
-def load_demo_model():
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-hf")
-    model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-hf")
-    # ... Add logic to load a tiny slice of the model ...
-    return tokenizer, model
-# # Load model on demand
-# if st.button("Load Demo Model"):
-#     with st.spinner("Loading model (this might still take some time)..."):
-#
-#         st.success("Demo model loaded!")
-# Text input for prompt
-input_text = st.text_area("Enter a prompt for text generation:")
-# Button to trigger generation
-if st.button("Generate Text"):
-    # ... Logic to use model.generate() with input_text ...
-    generated_text = "..."  #Replace with your generation code
-    tokenizer, model = load_demo_model()
-    st.write(generated_text)

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Your model and tokenizer definitions remain the same
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
+st.title("Chat with the Language Model")
+# Area to display chat history
+chat_history = st.empty()
+# User input box
+user_input = st.text_input("Your message:", key="input")
+# Submit button
+if st.button('Send'):
+    # Add user message to chat history
+    messages.append({"role": "user", "content": user_input})
+    chat_history.text("\n".join([f"**{msg['role']}**: {msg['content']}" for msg in messages]))
+    # Process the input with the model
+    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+    outputs = model.generate(inputs, max_new_tokens=20)
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Add model response to chat history
+    messages.append({"role": "assistant", "content": generated_text})
+    chat_history.text("\n".join([f"**{msg['role']}**: {msg['content']}" for msg in messages]))