Spaces:

AiCoderv2
/

dark-app-74

Sleeping

App Files Files Community

AiCoderv2 commited on Oct 4, 2025

Commit

6558d3e

verified ·

1 Parent(s): 7c2a0f5

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +58 -33

app.py CHANGED Viewed

@@ -5,18 +5,18 @@ from typing import List, Dict
 class ChatbotHandler:
     def __init__(self):
-        self.model_name = "microsoft/DialoGPT-large"  # Large conversational model
         self.tokenizer = None
         self.model = None
         self.chat_pipeline = None
         self.max_length = 1000
         self.temperature = 0.7
         self.model_loaded = False
-        self.system_prompt = """You are a helpful, friendly, and knowledgeable AI assistant.
-        You provide clear, accurate, and thoughtful responses. You are engaging and try to be
-        helpful while being honest about your limitations. Always maintain a positive and
         supportive tone in your conversations."""
         # Initialize the model
         self.initialize_model()
@@ -29,24 +29,30 @@ class ChatbotHandler:
             return False
         try:
-            print("Loading DialoGPT-large model... This may take several minutes.")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
             # Set pad token if not present
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             # Create pipeline for text generation
             self.chat_pipeline = pipeline(
                 "text-generation",
                 model=self.model,
                 tokenizer=self.tokenizer,
-                device=0 if os.getenv("CUDA_VISIBLE_DEVICES") else -1,  # Use GPU if available
                 max_length=self.max_length,
                 temperature=self.temperature,
                 do_sample=True,
-                pad_token_id=self.tokenizer.eos_token_id
             )
             print("Model loaded successfully!")
             self.model_loaded = True
@@ -64,8 +70,8 @@ class ChatbotHandler:
             # Prepare conversation history as a single string
             conversation = self.system_prompt + "\n"
-            # Add recent history (limit to last 5 exchanges to avoid context limits)
-            for msg in history[-5:]:
                 if msg["role"] == "user":
                     conversation += f"User: {msg['content']}\n"
                 elif msg["role"] == "assistant":
@@ -74,12 +80,14 @@ class ChatbotHandler:
             # Add current message
             conversation += f"User: {message}\nAssistant:"
-            # Generate response
             outputs = self.chat_pipeline(
                 conversation,
-                max_new_tokens=150,  # Limit response length
                 num_return_sequences=1,
-                return_full_text=False
             )
             response = outputs[0]['generated_text'].strip()
@@ -87,6 +95,12 @@ class ChatbotHandler:
             # Clean up response (remove any unwanted prefixes)
             if response.startswith("Assistant:"):
                 response = response[10:].strip()
             # Simulate streaming by yielding chunks
             words = response.split()
@@ -94,10 +108,10 @@ class ChatbotHandler:
             for word in words:
                 current_response += word + " "
                 yield current_response.strip()
-                time.sleep(0.05)  # Small delay for streaming effect
         except Exception as e:
-            yield f"Error generating response: {str(e)}"
 # Initialize chatbot handler
 chat_handler = ChatbotHandler()
@@ -107,22 +121,32 @@ def respond_stream(message: str, history: List[Dict]):
     if not message.strip():
         return "", history
     # Check if model is initialized
     if not chat_handler.chat_pipeline:
-        return "", history + [{"role": "assistant", "content": "The chatbot is loading the model. Please wait a moment and try again."}]
-    # Add user message
-    history.append({"role": "user", "content": message})
     # Get streaming response
     full_response = ""
-    for chunk in chat_handler.get_response(message, history[:-1]):  # Don't include current user message in history
-        full_response = chunk
-        # Update the last assistant message
         if len(history) > 0 and history[-1].get("role") == "assistant":
-            history[-1]["content"] = full_response
         else:
-            history.append({"role": "assistant", "content": full_response})
         yield "", history
 def clear_history():
@@ -136,13 +160,13 @@ def update_model_settings(temp, max_len):
     return f"Settings updated: temp={temp}, max_length={max_len}"
 # Create the interface
-with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo:
     # Header
     gr.HTML("""
     <div style='text-align: center; padding: 20px;'>
         <h1>🤖 AI Chatbot</h1>
-        <p style='color: #666;'>Powered by DialoGPT-Large • Built with <a href='https://huggingface.co/spaces/akhaliq/anycoder' target='_blank' style='color: #007bff; text-decoration: none;'>anycoder</a></p>
     </div>
     """)
@@ -151,8 +175,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo
         status_msg = "✅ Chatbot is ready! Start chatting below."
         status_color = "#28a745"
     else:
-        status_msg = "❌ Model failed to load. Please check the console for details."
-        status_color = "#dc3545"
     gr.HTML(f"""
     <div style='text-align: center; padding: 10px; background-color: {status_color}15; border: 1px solid {status_color}30; border-radius: 5px; margin: 10px 0;'>
@@ -223,7 +247,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo
     # Footer
     gr.HTML("""
     <div style='text-align: center; padding: 10px; color: #888; font-size: 0.9em;'>
-        <p>This chatbot uses Microsoft's DialoGPT-Large model from Hugging Face. It's completely free to use!</p>
     </div>
     """)

 class ChatbotHandler:
     def __init__(self):
+        self.model_name = "facebook/opt-13b"  # 13B parameter model (close to 15B)
         self.tokenizer = None
         self.model = None
         self.chat_pipeline = None
         self.max_length = 1000
         self.temperature = 0.7
         self.model_loaded = False
+        self.system_prompt = """You are a helpful, friendly, and knowledgeable AI assistant.
+        You provide clear, accurate, and thoughtful responses. You are engaging and try to be
+        helpful while being honest about your limitations. Always maintain a positive and
         supportive tone in your conversations."""
         # Initialize the model
         self.initialize_model()
             return False
         try:
+            print("Loading OPT-13B model... This may take a very long time and require significant memory.")
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True)
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                device_map="auto",  # Automatically distribute across available GPUs
+                torch_dtype="auto",
+                low_cpu_mem_usage=True
+            )
             # Set pad token if not present
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             # Create pipeline for text generation
             self.chat_pipeline = pipeline(
                 "text-generation",
                 model=self.model,
                 tokenizer=self.tokenizer,
+                device_map="auto",
                 max_length=self.max_length,
                 temperature=self.temperature,
                 do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+                truncation=True
             )
             print("Model loaded successfully!")
             self.model_loaded = True
             # Prepare conversation history as a single string
             conversation = self.system_prompt + "\n"
+            # Add recent history (limit to last 3 exchanges to save memory)
+            for msg in history[-3:]:
                 if msg["role"] == "user":
                     conversation += f"User: {msg['content']}\n"
                 elif msg["role"] == "assistant":
             # Add current message
             conversation += f"User: {message}\nAssistant:"
+            # Generate response with memory constraints
             outputs = self.chat_pipeline(
                 conversation,
+                max_new_tokens=100,  # Shorter responses to save memory
                 num_return_sequences=1,
+                return_full_text=False,
+                do_sample=True,
+                temperature=self.temperature
             )
             response = outputs[0]['generated_text'].strip()
             # Clean up response (remove any unwanted prefixes)
             if response.startswith("Assistant:"):
                 response = response[10:].strip()
+            elif response.startswith("User:"):
+                response = "I apologize, but I seem to have gotten confused. How can I help you?"
+            # Limit response length
+            if len(response) > 500:
+                response = response[:500] + "..."
             # Simulate streaming by yielding chunks
             words = response.split()
             for word in words:
                 current_response += word + " "
                 yield current_response.strip()
+                time.sleep(0.02)  # Faster streaming
         except Exception as e:
+            yield f"I apologize, but I encountered an error generating a response. Please try asking your question again. Error: {str(e)}"
 # Initialize chatbot handler
 chat_handler = ChatbotHandler()
     if not message.strip():
         return "", history
+    # Always add user message first to prevent disappearing chats
+    history = history + [{"role": "user", "content": message}]
     # Check if model is initialized
     if not chat_handler.chat_pipeline:
+        history = history + [{"role": "assistant", "content": "The chatbot model is still loading. Please wait a moment and try again."}]
+        return "", history
     # Get streaming response
     full_response = ""
+    try:
+        for chunk in chat_handler.get_response(message, history[:-1]):  # Don't include current user message in context
+            full_response = chunk
+            # Update the last assistant message
+            if len(history) > 0 and history[-1].get("role") == "assistant":
+                history[-1]["content"] = full_response
+            else:
+                history = history + [{"role": "assistant", "content": full_response}]
+            yield "", history
+    except Exception as e:
+        # If streaming fails, add a fallback response
+        error_msg = "I apologize, but I encountered an error. Please try again."
         if len(history) > 0 and history[-1].get("role") == "assistant":
+            history[-1]["content"] = error_msg
         else:
+            history = history + [{"role": "assistant", "content": error_msg}]
         yield "", history
 def clear_history():
     return f"Settings updated: temp={temp}, max_length={max_len}"
 # Create the interface
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with OPT-13B") as demo:
     # Header
     gr.HTML("""
     <div style='text-align: center; padding: 20px;'>
         <h1>🤖 AI Chatbot</h1>
+        <p style='color: #666;'>Powered by OPT-13B (13B parameters) • Built with <a href='https://huggingface.co/spaces/akhaliq/anycoder' target='_blank' style='color: #007bff; text-decoration: none;'>anycoder</a></p>
     </div>
     """)
         status_msg = "✅ Chatbot is ready! Start chatting below."
         status_color = "#28a745"
     else:
+        status_msg = "⏳ Loading OPT-13B model... This may take 10-20 minutes and requires significant memory."
+        status_color = "#ffc107"
     gr.HTML(f"""
     <div style='text-align: center; padding: 10px; background-color: {status_color}15; border: 1px solid {status_color}30; border-radius: 5px; margin: 10px 0;'>
     # Footer
     gr.HTML("""
     <div style='text-align: center; padding: 10px; color: #888; font-size: 0.9em;'>
+        <p>This chatbot uses Meta's OPT-13B model (13 billion parameters) from Hugging Face. It's completely free to use!</p>
+        <p><strong>Note:</strong> This large model requires significant computational resources and may take time to load and respond.</p>
     </div>
     """)