Spaces:

Prajjwalng
/

customercare

Sleeping

App Files Files Community

Prajjwalng commited on Mar 15, 2025

Commit

2560070

verified ·

1 Parent(s): 95fb0a3

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -52

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 from huggingface_hub import login
 from peft import PeftModel, PeftConfig
 import time
 # Login with HF_TOKEN (if available)
 hf_token = os.environ.get("HF_TOKEN")
@@ -18,8 +19,8 @@ else:
     st.warning("HF_TOKEN environment variable not set. Some features may be limited.")
 # Model and Adapter Configuration
-model_id = "Prajjwalng/gemma_customer_care"  # Base model
-adapter_id = "Prajjwalng/gemma_customercare_adapters"  # adapter model
 # Initialize model and tokenizer (load only once)
 @st.cache_resource
@@ -31,37 +32,25 @@ def load_model(model_id):
         torch_dtype=torch.float16,
         device_map={"": 0} if torch.cuda.is_available() else "cpu"
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
     return base_model, tokenizer
 merged_model, tokenizer = load_model(model_id)
-# Function to generate chatbot response using the provided template
-def get_completion(query: str, model, tokenizer) -> str:
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     prompt_template = f"""
 <start_of_turn>system You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<end_of_turn>
-<start_of_turn>user
-{query}
-<end_of_turn>
-<start_of_turn>model
-"""
     prompt = prompt_template.format(query=query)
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
     model.to(device)
     generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
     decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     model_response = decoded.split("model\n")[-1].strip()
     return model_response
 # Streamlit app
@@ -70,47 +59,61 @@ st.title("Customer Care ChatBot")
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
-    # Add initial welcome message
     initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
     st.session_state.messages.append(initial_message)
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
 # Accept user input
 if prompt := st.chat_input("How can I help you?"):
-    # Add user message to chat history
     st.session_state.messages.append({"role": "user", "content": prompt})
-    # Display user message in chat message container
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Generate and display chatbot response
-    with st.chat_message("assistant"):
-        message_placeholder = st.empty()
-        typing_placeholder = st.empty()
-        typing_dots = ""  # Initialize empty string for typing dots
-        # Animate typing dots
-        for i in range(3):
-            typing_dots += "."
-            typing_placeholder.markdown(typing_dots)
-            time.sleep(0.3)  # Adjust speed as needed
-        typing_placeholder.empty()  # Clear typing dots
-        full_response = ""
-        response = get_completion(prompt, merged_model, tokenizer)
-        # Simulate stream of responses with milliseconds delay
-        for chunk in response.split():
-            full_response += chunk + " "
-            time.sleep(0.05)
-            # Add a placeholder to stream the response
-            message_placeholder.markdown(full_response + "▌")
-        message_placeholder.markdown(full_response)
-    # Add assistant response to chat history
     st.session_state.messages.append({"role": "assistant", "content": full_response})

 from huggingface_hub import login
 from peft import PeftModel, PeftConfig
 import time
+import threading
 # Login with HF_TOKEN (if available)
 hf_token = os.environ.get("HF_TOKEN")
     st.warning("HF_TOKEN environment variable not set. Some features may be limited.")
 # Model and Adapter Configuration
+model_id = "Prajjwalng/gemma_customer_care"
+adapter_id = "Prajjwalng/gemma_customercare_adapters"
 # Initialize model and tokenizer (load only once)
 @st.cache_resource
         torch_dtype=torch.float16,
         device_map={"": 0} if torch.cuda.is_available() else "cpu"
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
     return base_model, tokenizer
 merged_model, tokenizer = load_model(model_id)
+# Function to generate chatbot response
+def get_completion(query: str, model, tokenizer, stop_event) -> str:
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     prompt_template = f"""
 <start_of_turn>system You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<end_of_turn>
+<start_of_turn>user\n\n{query}<end_of_turn>\n\n<start_of_turn>model\n"""
     prompt = prompt_template.format(query=query)
     encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
     model_inputs = encodeds.to(device)
     model.to(device)
     generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
     decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     model_response = decoded.split("model\n")[-1].strip()
+    stop_event.set() #signal to stop typing animation.
     return model_response
 # Streamlit app
 # Initialize chat history
 if "messages" not in st.session_state:
     st.session_state.messages = []
     initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
     st.session_state.messages.append(initial_message)
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
+    if message["role"] == "assistant":
+        with st.container():
+            col1, col2 = st.columns([1, 4])
+            with col1:
+                st.write("Agent:")
+            with col2:
+                st.markdown(message["content"])
+    else:
+        with st.container():
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                st.markdown(message["content"])
+            with col2:
+                st.write("Customer:")
 # Accept user input
 if prompt := st.chat_input("How can I help you?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.container():
+        col1, col2 = st.columns([4, 1])
+        with col1:
+            st.markdown(prompt)
+        with col2:
+            st.write("Customer:")
+    with st.container():
+        col1, col2 = st.columns([1, 4])
+        with col1:
+            st.write("Agent:")
+        with col2:
+            message_placeholder = st.empty()
+            typing_placeholder = st.empty()
+            stop_event = threading.Event() # Create an event to stop the typing animation.
+            def animate_typing(placeholder, stop_event):
+                typing_dots = ""
+                while not stop_event.is_set():
+                    typing_dots += "."
+                    if len(typing_dots) > 3:
+                        typing_dots = "."
+                    placeholder.markdown(typing_dots)
+                    time.sleep(0.3)
+                placeholder.empty()
+            threading.Thread(target=animate_typing, args=(typing_placeholder, stop_event)).start() #start the typing animation.
+            full_response = ""
+            response = get_completion(prompt, merged_model, tokenizer, stop_event) #pass the stop event.
+            for chunk in response.split():
+                full_response += chunk + " "
+                time.sleep(0.05)
+                message_placeholder.markdown(full_response + "▌")
+            message_placeholder.markdown(full_response)
     st.session_state.messages.append({"role": "assistant", "content": full_response})