Spaces:

eabybabu
/

chatbot-api

Sleeping

App Files Files Community

eabybabu commited on Mar 19, 2025

Commit

5cfc235

1 Parent(s): 4729206

Optimized chatbot for speed with CUDA & quantization

Browse files

Files changed (1) hide show

app.py +19 -18

app.py CHANGED Viewed

@@ -1,43 +1,46 @@
 import os
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # ✅ Load API Token Securely from Hugging Face Secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
-# ✅ Load model and tokenizer from Hugging Face Model Hub
 MODEL_NAME = "eabybabu/chatbot_model"  # Replace with your actual model name
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HF_TOKEN)
-# ✅ Function to generate chatbot responses while maintaining chat history
 def chatbot_response(user_input, chat_history):
     try:
-        # Combine chat history with new query
         chat_context = " ".join([f"User: {msg}\nChatbot: {resp}" for msg, resp in chat_history])
         prompt = f"{chat_context}\nUser: {user_input}\nChatbot:"
         # Encode input
-        inputs = tokenizer.encode(prompt, return_tensors="pt")
-        # Generate response
         outputs = model.generate(
             inputs,
-            max_length=300,  # Control response length
-            temperature=0.7,  # Controls randomness
-            top_k=50,  # Limits token selection
-            top_p=0.9,  # Nucleus sampling
-            repetition_penalty=1.5,  # Prevents repetition
-            num_return_sequences=1  # Return one response
         )
         # Decode response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Clean up response (remove repeated parts)
-        response = ". ".join(set(response.split(". ")))
-        # Append new message to history
         chat_history.append((user_input, response))
         return chat_history, ""
@@ -54,10 +57,8 @@ with gr.Blocks() as demo:
     user_input = gr.Textbox(label="Type your question:")
     submit_btn = gr.Button("Ask Chatbot")
-    # Initialize chat history
     chat_history = gr.State([])
-    # Connect button to chatbot function
     submit_btn.click(chatbot_response, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
 # ✅ Launch the Gradio app

 import os
+import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # ✅ Load API Token Securely from Hugging Face Secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
+# ✅ Load model and tokenizer (Optimized for Speed)
 MODEL_NAME = "eabybabu/chatbot_model"  # Replace with your actual model name
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
+# ✅ Use GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ✅ Load model and apply quantization (if available)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HF_TOKEN).to(device)
+model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)  # Apply quantization
+# ✅ Function to generate chatbot responses with chat history
 def chatbot_response(user_input, chat_history):
     try:
         chat_context = " ".join([f"User: {msg}\nChatbot: {resp}" for msg, resp in chat_history])
         prompt = f"{chat_context}\nUser: {user_input}\nChatbot:"
         # Encode input
+        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
+        # Generate response (Faster with CUDA & Optimized Settings)
         outputs = model.generate(
             inputs,
+            max_length=200,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.9,
+            repetition_penalty=1.5,
+            num_return_sequences=1
         )
         # Decode response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = ". ".join(set(response.split(". ")))  # Prevent repetition
         chat_history.append((user_input, response))
         return chat_history, ""
     user_input = gr.Textbox(label="Type your question:")
     submit_btn = gr.Button("Ask Chatbot")
     chat_history = gr.State([])
     submit_btn.click(chatbot_response, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
 # ✅ Launch the Gradio app