Spaces:

odaly
/

fuzzylab

Sleeping

App Files Files Community

odaly commited on Sep 13, 2024

Commit

c9bdddd

verified ·

1 Parent(s): 12195e5

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import streamlit as st
-import transformers
 import torch
 import time
@@ -13,22 +13,41 @@ else:
     st.error("Hugging Face API token not found. Please set the HUGGING_FACE_API_TOKEN environment variable.")
     st.stop()
 # Initialize the model and tokenizer
-model_id = "gpt2"  # Example model ID
-tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, token=hf_token)
-model = transformers.AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
 def generate_response(prompt):
-    input_ids = tokenizer.encode(prompt, return_tensors='pt')
-    output = model.generate(input_ids, max_length=150, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95)
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     return response
-def response_generator(content):
-    for word in content.split():
-        yield word + " "
-        time.sleep(0.1)  # Small delay for streaming effect
 def save_chat():
     chat_dir = './Intermediate-Chats'
     if not os.path.exists(chat_dir):
@@ -61,6 +80,13 @@ def load_chat(file_path):
                 role, content = line.strip().split(': ', 1)
                 st.session_state['messages'].append({'role': role, 'content': content})
 def main():
     st.title("LLaMA Chat Interface")
@@ -82,10 +108,11 @@ def main():
         # Streaming response in the chat interface
         with st.chat_message("assistant"):
             full_response = ""
             for word in response_generator(response):
                 full_response += word
-                st.write(full_response)  # Re-write the entire content for a streaming effect
     # Sidebar functionality
     if st.sidebar.button("Save Chat"):

 import os
 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import time
     st.error("Hugging Face API token not found. Please set the HUGGING_FACE_API_TOKEN environment variable.")
     st.stop()
+# Model ID (use a valid model from Hugging Face)
+model_id = "gpt2"  # Replace with a valid model
 # Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
+# Set pad_token_id to eos_token_id to avoid the warning
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Alternatively, add a new padding token if it's not defined
+# if tokenizer.pad_token is None:
+#     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+#     model.resize_token_embeddings(len(tokenizer))
 def generate_response(prompt):
+    # Tokenize the prompt with attention mask
+    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
+    # Generate text with the attention mask
+    output = model.generate(
+        inputs['input_ids'],
+        attention_mask=inputs['attention_mask'],  # Pass attention mask to prevent the warning
+        max_length=150,
+        num_return_sequences=1,
+        do_sample=True,
+        top_k=50,
+        top_p=0.95
+    )
+    # Decode the generated output
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     return response
 def save_chat():
     chat_dir = './Intermediate-Chats'
     if not os.path.exists(chat_dir):
                 role, content = line.strip().split(': ', 1)
                 st.session_state['messages'].append({'role': role, 'content': content})
+def response_generator(content):
+    current_output = ""
+    for word in content.split():
+        current_output += word + " "
+        yield current_output.strip()
+        time.sleep(0.2)
 def main():
     st.title("LLaMA Chat Interface")
         # Streaming response in the chat interface
         with st.chat_message("assistant"):
+            placeholder = st.empty()
             full_response = ""
             for word in response_generator(response):
                 full_response += word
+                placeholder.write(full_response)
     # Sidebar functionality
     if st.sidebar.button("Save Chat"):