AiCoderv2 commited on
Commit
6558d3e
·
verified ·
1 Parent(s): 7c2a0f5

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +58 -33
app.py CHANGED
@@ -5,18 +5,18 @@ from typing import List, Dict
5
 
6
  class ChatbotHandler:
7
  def __init__(self):
8
- self.model_name = "microsoft/DialoGPT-large" # Large conversational model
9
  self.tokenizer = None
10
  self.model = None
11
  self.chat_pipeline = None
12
  self.max_length = 1000
13
  self.temperature = 0.7
14
  self.model_loaded = False
15
- self.system_prompt = """You are a helpful, friendly, and knowledgeable AI assistant.
16
- You provide clear, accurate, and thoughtful responses. You are engaging and try to be
17
- helpful while being honest about your limitations. Always maintain a positive and
18
  supportive tone in your conversations."""
19
-
20
  # Initialize the model
21
  self.initialize_model()
22
 
@@ -29,24 +29,30 @@ class ChatbotHandler:
29
  return False
30
 
31
  try:
32
- print("Loading DialoGPT-large model... This may take several minutes.")
33
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
34
- self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
35
-
 
 
 
 
 
36
  # Set pad token if not present
37
  if self.tokenizer.pad_token is None:
38
  self.tokenizer.pad_token = self.tokenizer.eos_token
39
-
40
  # Create pipeline for text generation
41
  self.chat_pipeline = pipeline(
42
  "text-generation",
43
  model=self.model,
44
  tokenizer=self.tokenizer,
45
- device=0 if os.getenv("CUDA_VISIBLE_DEVICES") else -1, # Use GPU if available
46
  max_length=self.max_length,
47
  temperature=self.temperature,
48
  do_sample=True,
49
- pad_token_id=self.tokenizer.eos_token_id
 
50
  )
51
  print("Model loaded successfully!")
52
  self.model_loaded = True
@@ -64,8 +70,8 @@ class ChatbotHandler:
64
  # Prepare conversation history as a single string
65
  conversation = self.system_prompt + "\n"
66
 
67
- # Add recent history (limit to last 5 exchanges to avoid context limits)
68
- for msg in history[-5:]:
69
  if msg["role"] == "user":
70
  conversation += f"User: {msg['content']}\n"
71
  elif msg["role"] == "assistant":
@@ -74,12 +80,14 @@ class ChatbotHandler:
74
  # Add current message
75
  conversation += f"User: {message}\nAssistant:"
76
 
77
- # Generate response
78
  outputs = self.chat_pipeline(
79
  conversation,
80
- max_new_tokens=150, # Limit response length
81
  num_return_sequences=1,
82
- return_full_text=False
 
 
83
  )
84
 
85
  response = outputs[0]['generated_text'].strip()
@@ -87,6 +95,12 @@ class ChatbotHandler:
87
  # Clean up response (remove any unwanted prefixes)
88
  if response.startswith("Assistant:"):
89
  response = response[10:].strip()
 
 
 
 
 
 
90
 
91
  # Simulate streaming by yielding chunks
92
  words = response.split()
@@ -94,10 +108,10 @@ class ChatbotHandler:
94
  for word in words:
95
  current_response += word + " "
96
  yield current_response.strip()
97
- time.sleep(0.05) # Small delay for streaming effect
98
 
99
  except Exception as e:
100
- yield f"Error generating response: {str(e)}"
101
 
102
  # Initialize chatbot handler
103
  chat_handler = ChatbotHandler()
@@ -107,22 +121,32 @@ def respond_stream(message: str, history: List[Dict]):
107
  if not message.strip():
108
  return "", history
109
 
 
 
 
110
  # Check if model is initialized
111
  if not chat_handler.chat_pipeline:
112
- return "", history + [{"role": "assistant", "content": "The chatbot is loading the model. Please wait a moment and try again."}]
113
-
114
- # Add user message
115
- history.append({"role": "user", "content": message})
116
 
117
  # Get streaming response
118
  full_response = ""
119
- for chunk in chat_handler.get_response(message, history[:-1]): # Don't include current user message in history
120
- full_response = chunk
121
- # Update the last assistant message
 
 
 
 
 
 
 
 
 
122
  if len(history) > 0 and history[-1].get("role") == "assistant":
123
- history[-1]["content"] = full_response
124
  else:
125
- history.append({"role": "assistant", "content": full_response})
126
  yield "", history
127
 
128
  def clear_history():
@@ -136,13 +160,13 @@ def update_model_settings(temp, max_len):
136
  return f"Settings updated: temp={temp}, max_length={max_len}"
137
 
138
  # Create the interface
139
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo:
140
 
141
  # Header
142
  gr.HTML("""
143
  <div style='text-align: center; padding: 20px;'>
144
  <h1>🤖 AI Chatbot</h1>
145
- <p style='color: #666;'>Powered by DialoGPT-Large • Built with <a href='https://huggingface.co/spaces/akhaliq/anycoder' target='_blank' style='color: #007bff; text-decoration: none;'>anycoder</a></p>
146
  </div>
147
  """)
148
 
@@ -151,8 +175,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo
151
  status_msg = "✅ Chatbot is ready! Start chatting below."
152
  status_color = "#28a745"
153
  else:
154
- status_msg = " Model failed to load. Please check the console for details."
155
- status_color = "#dc3545"
156
 
157
  gr.HTML(f"""
158
  <div style='text-align: center; padding: 10px; background-color: {status_color}15; border: 1px solid {status_color}30; border-radius: 5px; margin: 10px 0;'>
@@ -223,7 +247,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with DialoGPT") as demo
223
  # Footer
224
  gr.HTML("""
225
  <div style='text-align: center; padding: 10px; color: #888; font-size: 0.9em;'>
226
- <p>This chatbot uses Microsoft's DialoGPT-Large model from Hugging Face. It's completely free to use!</p>
 
227
  </div>
228
  """)
229
 
 
5
 
6
  class ChatbotHandler:
7
  def __init__(self):
8
+ self.model_name = "facebook/opt-13b" # 13B parameter model (close to 15B)
9
  self.tokenizer = None
10
  self.model = None
11
  self.chat_pipeline = None
12
  self.max_length = 1000
13
  self.temperature = 0.7
14
  self.model_loaded = False
15
+ self.system_prompt = """You are a helpful, friendly, and knowledgeable AI assistant.
16
+ You provide clear, accurate, and thoughtful responses. You are engaging and try to be
17
+ helpful while being honest about your limitations. Always maintain a positive and
18
  supportive tone in your conversations."""
19
+
20
  # Initialize the model
21
  self.initialize_model()
22
 
 
29
  return False
30
 
31
  try:
32
+ print("Loading OPT-13B model... This may take a very long time and require significant memory.")
33
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True)
34
+ self.model = AutoModelForCausalLM.from_pretrained(
35
+ self.model_name,
36
+ device_map="auto", # Automatically distribute across available GPUs
37
+ torch_dtype="auto",
38
+ low_cpu_mem_usage=True
39
+ )
40
+
41
  # Set pad token if not present
42
  if self.tokenizer.pad_token is None:
43
  self.tokenizer.pad_token = self.tokenizer.eos_token
44
+
45
  # Create pipeline for text generation
46
  self.chat_pipeline = pipeline(
47
  "text-generation",
48
  model=self.model,
49
  tokenizer=self.tokenizer,
50
+ device_map="auto",
51
  max_length=self.max_length,
52
  temperature=self.temperature,
53
  do_sample=True,
54
+ pad_token_id=self.tokenizer.eos_token_id,
55
+ truncation=True
56
  )
57
  print("Model loaded successfully!")
58
  self.model_loaded = True
 
70
  # Prepare conversation history as a single string
71
  conversation = self.system_prompt + "\n"
72
 
73
+ # Add recent history (limit to last 3 exchanges to save memory)
74
+ for msg in history[-3:]:
75
  if msg["role"] == "user":
76
  conversation += f"User: {msg['content']}\n"
77
  elif msg["role"] == "assistant":
 
80
  # Add current message
81
  conversation += f"User: {message}\nAssistant:"
82
 
83
+ # Generate response with memory constraints
84
  outputs = self.chat_pipeline(
85
  conversation,
86
+ max_new_tokens=100, # Shorter responses to save memory
87
  num_return_sequences=1,
88
+ return_full_text=False,
89
+ do_sample=True,
90
+ temperature=self.temperature
91
  )
92
 
93
  response = outputs[0]['generated_text'].strip()
 
95
  # Clean up response (remove any unwanted prefixes)
96
  if response.startswith("Assistant:"):
97
  response = response[10:].strip()
98
+ elif response.startswith("User:"):
99
+ response = "I apologize, but I seem to have gotten confused. How can I help you?"
100
+
101
+ # Limit response length
102
+ if len(response) > 500:
103
+ response = response[:500] + "..."
104
 
105
  # Simulate streaming by yielding chunks
106
  words = response.split()
 
108
  for word in words:
109
  current_response += word + " "
110
  yield current_response.strip()
111
+ time.sleep(0.02) # Faster streaming
112
 
113
  except Exception as e:
114
+ yield f"I apologize, but I encountered an error generating a response. Please try asking your question again. Error: {str(e)}"
115
 
116
  # Initialize chatbot handler
117
  chat_handler = ChatbotHandler()
 
121
  if not message.strip():
122
  return "", history
123
 
124
+ # Always add user message first to prevent disappearing chats
125
+ history = history + [{"role": "user", "content": message}]
126
+
127
  # Check if model is initialized
128
  if not chat_handler.chat_pipeline:
129
+ history = history + [{"role": "assistant", "content": "The chatbot model is still loading. Please wait a moment and try again."}]
130
+ return "", history
 
 
131
 
132
  # Get streaming response
133
  full_response = ""
134
+ try:
135
+ for chunk in chat_handler.get_response(message, history[:-1]): # Don't include current user message in context
136
+ full_response = chunk
137
+ # Update the last assistant message
138
+ if len(history) > 0 and history[-1].get("role") == "assistant":
139
+ history[-1]["content"] = full_response
140
+ else:
141
+ history = history + [{"role": "assistant", "content": full_response}]
142
+ yield "", history
143
+ except Exception as e:
144
+ # If streaming fails, add a fallback response
145
+ error_msg = "I apologize, but I encountered an error. Please try again."
146
  if len(history) > 0 and history[-1].get("role") == "assistant":
147
+ history[-1]["content"] = error_msg
148
  else:
149
+ history = history + [{"role": "assistant", "content": error_msg}]
150
  yield "", history
151
 
152
  def clear_history():
 
160
  return f"Settings updated: temp={temp}, max_length={max_len}"
161
 
162
  # Create the interface
163
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Chatbot with OPT-13B") as demo:
164
 
165
  # Header
166
  gr.HTML("""
167
  <div style='text-align: center; padding: 20px;'>
168
  <h1>🤖 AI Chatbot</h1>
169
+ <p style='color: #666;'>Powered by OPT-13B (13B parameters) • Built with <a href='https://huggingface.co/spaces/akhaliq/anycoder' target='_blank' style='color: #007bff; text-decoration: none;'>anycoder</a></p>
170
  </div>
171
  """)
172
 
 
175
  status_msg = "✅ Chatbot is ready! Start chatting below."
176
  status_color = "#28a745"
177
  else:
178
+ status_msg = " Loading OPT-13B model... This may take 10-20 minutes and requires significant memory."
179
+ status_color = "#ffc107"
180
 
181
  gr.HTML(f"""
182
  <div style='text-align: center; padding: 10px; background-color: {status_color}15; border: 1px solid {status_color}30; border-radius: 5px; margin: 10px 0;'>
 
247
  # Footer
248
  gr.HTML("""
249
  <div style='text-align: center; padding: 10px; color: #888; font-size: 0.9em;'>
250
+ <p>This chatbot uses Meta's OPT-13B model (13 billion parameters) from Hugging Face. It's completely free to use!</p>
251
+ <p><strong>Note:</strong> This large model requires significant computational resources and may take time to load and respond.</p>
252
  </div>
253
  """)
254