Spaces:

Prajjwalng
/

customercare

Sleeping

App Files Files Community

Prajjwalng commited on Mar 15, 2025

Commit

e3cc7e5

verified ·

1 Parent(s): 2560070

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -55

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import os
 from huggingface_hub import login
 from peft import PeftModel, PeftConfig
 import time
-import threading
 # Login with HF_TOKEN (if available)
 hf_token = os.environ.get("HF_TOKEN")
@@ -19,8 +18,8 @@ else:
     st.warning("HF_TOKEN environment variable not set. Some features may be limited.")
 # Model and Adapter Configuration
-model_id = "Prajjwalng/gemma_customer_care"
-adapter_id = "Prajjwalng/gemma_customercare_adapters"
 # Initialize model and tokenizer (load only once)
 @st.cache_resource
@@ -32,25 +31,37 @@ def load_model(model_id):
         torch_dtype=torch.float16,
         device_map={"": 0} if torch.cuda.is_available() else "cpu"
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
     return base_model, tokenizer
 merged_model, tokenizer = load_model(model_id)
-# Function to generate chatbot response
-def get_completion(query: str, model, tokenizer, stop_event) -> str:
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     prompt_template = f"""
 <start_of_turn>system You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<end_of_turn>
-<start_of_turn>user\n\n{query}<end_of_turn>\n\n<start_of_turn>model\n"""
     prompt = prompt_template.format(query=query)
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
     model.to(device)
     generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
     decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     model_response = decoded.split("model\n")[-1].strip()
-    stop_event.set() #signal to stop typing animation.
     return model_response
 # Streamlit app
@@ -59,61 +70,47 @@ st.title("Customer Care ChatBot")
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
     initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
     st.session_state.messages.append(initial_message)
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
-    if message["role"] == "assistant":
-        with st.container():
-            col1, col2 = st.columns([1, 4])
-            with col1:
-                st.write("Agent:")
-            with col2:
-                st.markdown(message["content"])
-    else:
-        with st.container():
-            col1, col2 = st.columns([4, 1])
-            with col1:
-                st.markdown(message["content"])
-            with col2:
-                st.write("Customer:")
 # Accept user input
 if prompt := st.chat_input("How can I help you?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.container():
-        col1, col2 = st.columns([4, 1])
-        with col1:
-            st.markdown(prompt)
-        with col2:
-            st.write("Customer:")
-    with st.container():
-        col1, col2 = st.columns([1, 4])
-        with col1:
-            st.write("Agent:")
-        with col2:
-            message_placeholder = st.empty()
-            typing_placeholder = st.empty()
-            stop_event = threading.Event() # Create an event to stop the typing animation.
-            def animate_typing(placeholder, stop_event):
-                typing_dots = ""
-                while not stop_event.is_set():
-                    typing_dots += "."
-                    if len(typing_dots) > 3:
-                        typing_dots = "."
-                    placeholder.markdown(typing_dots)
-                    time.sleep(0.3)
-                placeholder.empty()
-            threading.Thread(target=animate_typing, args=(typing_placeholder, stop_event)).start() #start the typing animation.
-            full_response = ""
-            response = get_completion(prompt, merged_model, tokenizer, stop_event) #pass the stop event.
-            for chunk in response.split():
-                full_response += chunk + " "
-                time.sleep(0.05)
-                message_placeholder.markdown(full_response + "▌")
-            message_placeholder.markdown(full_response)
     st.session_state.messages.append({"role": "assistant", "content": full_response})

 from huggingface_hub import login
 from peft import PeftModel, PeftConfig
 import time
 # Login with HF_TOKEN (if available)
 hf_token = os.environ.get("HF_TOKEN")
     st.warning("HF_TOKEN environment variable not set. Some features may be limited.")
 # Model and Adapter Configuration
+model_id = "Prajjwalng/gemma_customer_care"  # Base model
+adapter_id = "Prajjwalng/gemma_customercare_adapters"  # adapter model
 # Initialize model and tokenizer (load only once)
 @st.cache_resource
         torch_dtype=torch.float16,
         device_map={"": 0} if torch.cuda.is_available() else "cpu"
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
     return base_model, tokenizer
 merged_model, tokenizer = load_model(model_id)
+# Function to generate chatbot response using the provided template
+def get_completion(query: str, model, tokenizer) -> str:
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     prompt_template = f"""
 <start_of_turn>system You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<end_of_turn>
+<start_of_turn>user
+{query}
+<end_of_turn>
+<start_of_turn>model
+"""
     prompt = prompt_template.format(query=query)
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
     model.to(device)
     generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
     decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     model_response = decoded.split("model\n")[-1].strip()
     return model_response
 # Streamlit app
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
+    # Add initial welcome message
     initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
     st.session_state.messages.append(initial_message)
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
 # Accept user input
 if prompt := st.chat_input("How can I help you?"):
+    # Add user message to chat history
     st.session_state.messages.append({"role": "user", "content": prompt})
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Generate and display chatbot response
+    with st.chat_message("assistant"):
+        message_placeholder = st.empty()
+        typing_placeholder = st.empty()
+        typing_dots = ""  # Initialize empty string for typing dots
+        # Animate typing dots
+        for i in range(3):
+            typing_dots += "."
+            typing_placeholder.markdown(typing_dots)
+            time.sleep(0.3)  # Adjust speed as needed
+        typing_placeholder.empty()  # Clear typing dots
+        full_response = ""
+        response = get_completion(prompt, merged_model, tokenizer)
+        # Simulate stream of responses with milliseconds delay
+        for chunk in response.split():
+            full_response += chunk + " "
+            time.sleep(0.05)
+            # Add a placeholder to stream the response
+            message_placeholder.markdown(full_response + "▌")
+        message_placeholder.markdown(full_response)
+    # Add assistant response to chat history
     st.session_state.messages.append({"role": "assistant", "content": full_response})