Frankie-walsh4 commited on
Commit
e21cbb5
·
1 Parent(s): bca9833

change for AI thinking

Browse files
Files changed (1) hide show
  1. app.py +146 -161
app.py CHANGED
@@ -3,76 +3,101 @@ from huggingface_hub import InferenceClient
3
  import time
4
  import html
5
  import re
 
6
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
10
  client = InferenceClient("Trinoid/Data_Management")
11
 
12
- def clean_response(text):
13
- """Clean up response by removing meta-text, thinking artifacts, and repetitive content"""
14
- # Remove thinking phrases
15
- thinking_patterns = [
16
- r"I need to figure out",
17
- r"I'll start by",
18
- r"Let me try to",
19
- r"I'm trying to understand",
20
- r"First, I know that",
21
- r"I'll need to look into",
22
- r"I'm not entirely sure",
23
- r"I believe this is",
24
- r"I imagine it involves",
25
- r"Okay, so I need to",
26
- r"From what I know,",
27
- r"One of the main reasons to",
28
- r"Another reason to",
29
- ]
 
 
 
 
 
 
30
 
31
- for pattern in thinking_patterns:
32
- text = re.sub(pattern, "", text, flags=re.IGNORECASE)
 
33
 
34
- # Split into paragraphs for deduplication
35
- paragraphs = text.split('\n\n')
 
36
 
37
- # Only keep meaningful paragraphs
 
 
 
38
  filtered_paragraphs = []
39
- for p in paragraphs:
40
- if p and len(p.strip()) > 20: # Only include non-empty paragraphs with substance
41
- filtered_paragraphs.append(p)
 
 
42
 
43
- # Remove similar paragraphs (not just exact duplicates)
44
  unique_paragraphs = []
45
- for i, current_para in enumerate(filtered_paragraphs):
46
- is_duplicate = False
47
-
48
- # Convert to lowercase and remove punctuation for comparison
49
- clean_current = re.sub(r'[^\w\s]', '', current_para.lower())
50
 
51
- for prev_para in unique_paragraphs:
52
- # Clean previous paragraph too
53
- clean_prev = re.sub(r'[^\w\s]', '', prev_para.lower())
54
-
55
- # Check for similarity using character overlap
56
- # If more than 50% of words match, consider it similar
57
- words_current = set(clean_current.split())
58
- words_prev = set(clean_prev.split())
59
 
60
- if len(words_current) > 0 and len(words_prev) > 0:
61
- common_words = words_current.intersection(words_prev)
62
- similarity = len(common_words) / min(len(words_current), len(words_prev))
 
63
 
64
- if similarity > 0.5: # If more than 50% similar, it's a duplicate concept
65
  is_duplicate = True
66
  break
67
 
68
  if not is_duplicate:
69
- unique_paragraphs.append(current_para)
70
 
71
- # Add a summary paragraph if we removed a lot
72
- if len(unique_paragraphs) < len(filtered_paragraphs) / 2:
73
- unique_paragraphs.append("Note: Some repetitive content has been removed from this response for clarity.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- return '\n\n'.join(unique_paragraphs)
76
 
77
  def respond(
78
  message,
@@ -82,25 +107,36 @@ def respond(
82
  temperature,
83
  top_p,
84
  ):
85
- # Create a more structured system prompt with strict instructions about repetition
86
- enhanced_system_message = f"""
 
87
  {system_message}
88
 
89
- CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
90
- 1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
91
- 2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
92
- 3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
93
- 4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
94
- 5. NEVER REPEAT THE SAME INFORMATION IN DIFFERENT WORDS.
95
- 6. MENTION EACH CONCEPT EXACTLY ONCE - DO NOT ELABORATE ON THE SAME IDEA MULTIPLE TIMES.
96
- 7. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
97
- 8. LIMIT YOUR RESPONSE LENGTH TO WHAT IS NECESSARY - BE CONCISE.
98
- 9. WHEN GIVING EXAMPLES, PROVIDE ONE CLEAR EXAMPLE RATHER THAN MULTIPLE SIMILAR ONES.
99
- 10. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
100
- """
 
 
 
 
 
 
 
 
 
101
 
102
  messages = [{"role": "system", "content": enhanced_system_message}]
103
 
 
104
  for val in history:
105
  if val[0]:
106
  messages.append({"role": "user", "content": val[0]})
@@ -109,14 +145,14 @@ CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
109
 
110
  messages.append({"role": "user", "content": message})
111
 
112
- # Track generation state
113
- thinking_steps = []
114
  full_response = ""
 
115
  start_time = time.time()
116
- last_token_time = time.time()
117
 
118
  try:
119
- # Use chat completion
120
  for message in client.chat_completion(
121
  messages,
122
  max_tokens=max_tokens,
@@ -125,49 +161,46 @@ CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
125
  top_p=top_p,
126
  ):
127
  token = message.choices[0].delta.content
 
 
128
  if not token:
129
- # Check for long pause between tokens (potential stall)
130
- current_time = time.time()
131
- if current_time - last_token_time > 5: # 5 second timeout
132
- if full_response:
133
- break
134
  continue
135
-
136
- last_token_time = time.time()
137
  full_response += token
138
 
139
- # Save thinking steps for display only
140
- current_time = time.time()
141
- if current_time - start_time > 2 or len(full_response) % 200 == 0:
142
- start_time = current_time
143
  thinking_steps.append(full_response)
144
 
145
- # Format with thinking history as HTML
146
- if thinking_steps and len(thinking_steps) > 1:
 
147
  thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
148
- for i, step in enumerate(thinking_steps[:-1]):
149
  safe_step = html.escape(step)
150
  thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
151
  thinking_html += '</div></details></div>'
152
-
153
- # Always yield the full current response (no cleaning during generation)
154
- yield f"{thinking_html}{full_response}"
155
- else:
156
- yield full_response
 
157
 
158
- # Clean up the final response to remove thinking artifacts
159
- if "I'm trying to understand" in full_response or "I need to figure out" in full_response:
160
- cleaned_response = clean_response(full_response)
161
  thinking_html = '<div class="thinking-wrapper"><details><summary>Show original response</summary><div class="thinking-steps">'
162
  thinking_html += f'<div class="thinking-step">{html.escape(full_response)}</div>'
163
  thinking_html += '</div></details></div>'
 
164
 
165
- yield f"{thinking_html}{cleaned_response}"
166
-
167
  except Exception as e:
168
- # Handle exceptions gracefully
169
- error_message = f"I apologize, but I encountered an error while generating a response. Please try rephrasing your question or asking something else."
170
- yield error_message
171
 
172
 
173
  # Custom CSS for Plant Wisdom.AI styling
@@ -205,83 +238,35 @@ custom_css = """
205
  margin-right: 15%;
206
  border: 1px solid #e0ede0;
207
  color: #2c3338;
208
- transition: opacity 0.5s ease;
209
  }
210
 
211
- .submit-btn {
212
- background-color: #2e7d32 !important;
213
- color: white !important;
214
- border-radius: 8px !important;
215
- padding: 12px 24px !important;
216
- font-weight: 600 !important;
217
- font-size: 16px !important;
218
- transition: all 0.3s ease !important;
219
- border: none !important;
220
  }
221
 
222
- .submit-btn:hover {
223
- background-color: #1b5e20 !important;
224
- transform: translateY(-1px) !important;
225
- box-shadow: 0 4px 8px rgba(0,0,0,0.1) !important;
 
226
  }
227
 
228
- .submit-btn:active {
229
- transform: translateY(0) !important;
 
 
 
230
  }
231
 
232
- .slider-container {
233
- background-color: #f8faf8;
234
- padding: 20px;
235
- border-radius: 12px;
236
  margin: 12px 0;
237
- border: 1px solid #e0ede0;
238
- }
239
-
240
- .textbox {
241
- border: 2px solid #e0ede0 !important;
242
- border-radius: 8px !important;
243
- padding: 12px !important;
244
- font-size: 16px !important;
245
- transition: all 0.3s ease !important;
246
- background-color: #ffffff !important;
247
- }
248
-
249
- .textbox:focus {
250
- border-color: #2e7d32 !important;
251
- outline: none !important;
252
- box-shadow: 0 0 0 3px rgba(46,125,50,0.1) !important;
253
- }
254
-
255
- .title {
256
- color: #2c3338 !important;
257
- font-size: 32px !important;
258
- font-weight: 700 !important;
259
- margin-bottom: 16px !important;
260
  }
261
 
262
- .description {
263
- color: #505a62 !important;
264
- font-size: 18px !important;
265
- line-height: 1.6 !important;
266
- margin-bottom: 24px !important;
267
- }
268
-
269
- .additional-inputs {
270
- background-color: #f8faf8;
271
- border: 1px solid #e0ede0;
272
- border-radius: 12px;
273
- padding: 20px;
274
- margin-top: 24px;
275
- }
276
-
277
- .chatbot {
278
- height: 600px !important;
279
- border: 1px solid #e0ede0;
280
- border-radius: 12px;
281
- background-color: #ffffff;
282
  }
283
 
284
- /* Thinking process styling */
285
  .thinking-wrapper {
286
  margin-bottom: 12px;
287
  }
@@ -415,12 +400,12 @@ Follow any privacy or ethical guidelines, and do not disclose personally identif
415
  IMPORTANT: If a question has been asked before in the conversation, acknowledge this and either refer back to the previous answer or provide additional context. Do not simply repeat the same answer verbatim.""",
416
  label="System message"
417
  ),
418
- gr.Slider(minimum=1, maximum=2048, value=1200, step=1, label="Max new tokens"),
419
- gr.Slider(minimum=0.1, maximum=2.0, value=0.4, step=0.1, label="Temperature"),
420
  gr.Slider(
421
  minimum=0.1,
422
  maximum=1.0,
423
- value=0.7,
424
  step=0.05,
425
  label="Top-p (nucleus sampling)",
426
  ),
 
3
  import time
4
  import html
5
  import re
6
+ import traceback
7
 
8
  """
9
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
10
  """
11
  client = InferenceClient("Trinoid/Data_Management")
12
 
13
+ # Comprehensive patterns to filter out thinking and meta-commentary
14
+ THINKING_PATTERNS = [
15
+ r"Okay, so I('m| am) (trying to|going to|attempting to)",
16
+ r"I need to figure out",
17
+ r"I'll start by",
18
+ r"Let me try to",
19
+ r"I'm trying to understand",
20
+ r"First, I know that",
21
+ r"I'll need to look into",
22
+ r"I'm not entirely sure",
23
+ r"I believe this is",
24
+ r"I imagine it involves",
25
+ r"I think I understand",
26
+ r"From what I know",
27
+ r"Let me think about",
28
+ r"From my understanding",
29
+ r"As I understand it",
30
+ r"To answer this question",
31
+ r"To address this",
32
+ r"I'll approach this by",
33
+ ]
34
+
35
+ def process_final_response(response_text):
36
+ """Comprehensive processing of the final response to ensure quality"""
37
 
38
+ # Early return if response is too short
39
+ if len(response_text) < 50:
40
+ return response_text
41
 
42
+ # 1. Remove thinking patterns
43
+ for pattern in THINKING_PATTERNS:
44
+ response_text = re.sub(pattern, "", response_text, flags=re.IGNORECASE)
45
 
46
+ # 2. Split into paragraphs
47
+ paragraphs = [p.strip() for p in response_text.split('\n\n') if p.strip()]
48
+
49
+ # 3. Filter meaningless paragraphs
50
  filtered_paragraphs = []
51
+ for para in paragraphs:
52
+ # Skip too short paragraphs or those that are just meta-commentary
53
+ if len(para) < 20 or re.search(r"^(In summary|To summarize|In conclusion)", para, re.IGNORECASE):
54
+ continue
55
+ filtered_paragraphs.append(para)
56
 
57
+ # 4. Remove duplicates and similar paragraphs
58
  unique_paragraphs = []
59
+ for current in filtered_paragraphs:
60
+ # Clean for comparison
61
+ clean_current = re.sub(r'[^\w\s]', '', current.lower())
62
+ words_current = set(clean_current.split())
 
63
 
64
+ is_duplicate = False
65
+ for existing in unique_paragraphs:
66
+ clean_existing = re.sub(r'[^\w\s]', '', existing.lower())
67
+ words_existing = set(clean_existing.split())
 
 
 
 
68
 
69
+ if len(words_current) > 3 and len(words_existing) > 3: # Ignore very short paragraphs
70
+ # Calculate word overlap as similarity measure
71
+ overlap = len(words_current.intersection(words_existing))
72
+ similarity = overlap / min(len(words_current), len(words_existing))
73
 
74
+ if similarity > 0.6: # 60% threshold for similarity
75
  is_duplicate = True
76
  break
77
 
78
  if not is_duplicate:
79
+ unique_paragraphs.append(current)
80
 
81
+ # 5. Structure the response if needed
82
+ if len(unique_paragraphs) > 2 and not any(p.startswith('#') for p in unique_paragraphs):
83
+ # Try to add headings if response doesn't have them
84
+ structured_paragraphs = []
85
+
86
+ # Add main heading
87
+ if len(unique_paragraphs) > 0:
88
+ structured_paragraphs.append(f"# Key Differences Between OneDrive for Business and SharePoint Online\n")
89
+ structured_paragraphs.extend(unique_paragraphs[:2])
90
+
91
+ # Add subheadings for remaining content if appropriate
92
+ if len(unique_paragraphs) > 2:
93
+ structured_paragraphs.append(f"\n## When to Use Each Service\n")
94
+ structured_paragraphs.extend(unique_paragraphs[2:])
95
+
96
+ final_text = "\n\n".join(structured_paragraphs)
97
+ else:
98
+ final_text = "\n\n".join(unique_paragraphs)
99
 
100
+ return final_text.strip()
101
 
102
  def respond(
103
  message,
 
107
  temperature,
108
  top_p,
109
  ):
110
+ # Create a more effective system prompt
111
+ enhanced_system_message = f"""You are an expert in Microsoft 365 services including SharePoint, OneDrive, Teams, and the Microsoft 365 compliance ecosystem.
112
+
113
  {system_message}
114
 
115
+ FORMAT YOUR RESPONSE USING:
116
+ - Clear, direct language
117
+ - Markdown formatting with headings and bullet points
118
+ - Concise, factual information
119
+ - Specific technical details where appropriate
120
+
121
+ CRITICAL RESPONSE REQUIREMENTS:
122
+ 1. Start IMMEDIATELY with the answer - no preamble or self-reference
123
+ 2. NEVER say phrases like "I think", "I believe", "I'm not sure", "I'll try to"
124
+ 3. NEVER reveal your thought process or planning
125
+ 4. Be AUTHORITATIVE and PRECISE
126
+ 5. Present EACH KEY POINT EXACTLY ONCE
127
+ 6. Focus on GOVERNANCE & TECHNICAL details for Microsoft 365
128
+ 7. Keep total response under 2000 characters
129
+ 8. Use 2-3 paragraphs maximum
130
+ 9. Provide concrete governance recommendations
131
+
132
+ If comparing services:
133
+ - List key DIFFERENCES first
134
+ - THEN explain when to use each
135
+ - End with GOVERNANCE recommendations"""
136
 
137
  messages = [{"role": "system", "content": enhanced_system_message}]
138
 
139
+ # Add history and current message
140
  for val in history:
141
  if val[0]:
142
  messages.append({"role": "user", "content": val[0]})
 
145
 
146
  messages.append({"role": "user", "content": message})
147
 
148
+ # Initialize state variables
 
149
  full_response = ""
150
+ thinking_steps = []
151
  start_time = time.time()
152
+ generation_complete = False
153
 
154
  try:
155
+ # Generate response
156
  for message in client.chat_completion(
157
  messages,
158
  max_tokens=max_tokens,
 
161
  top_p=top_p,
162
  ):
163
  token = message.choices[0].delta.content
164
+
165
+ # Skip empty tokens
166
  if not token:
167
+ # Check for completion
168
+ if message.choices[0].finish_reason == "stop":
169
+ generation_complete = True
 
 
170
  continue
171
+
172
+ # Append token to response
173
  full_response += token
174
 
175
+ # Store thinking step snapshot every 250 chars
176
+ if len(full_response) % 250 == 0:
 
 
177
  thinking_steps.append(full_response)
178
 
179
+ # Format and display response
180
+ thinking_html = ""
181
+ if thinking_steps:
182
  thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
183
+ for i, step in enumerate(thinking_steps):
184
  safe_step = html.escape(step)
185
  thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
186
  thinking_html += '</div></details></div>'
187
+
188
+ # Yield the response
189
+ yield f"{thinking_html}{full_response}"
190
+
191
+ # Check if we need to post-process the response
192
+ processed_response = process_final_response(full_response)
193
 
194
+ # If the processing made significant changes, show both versions
195
+ if len(processed_response) < len(full_response) * 0.8 or len(processed_response) > 100:
 
196
  thinking_html = '<div class="thinking-wrapper"><details><summary>Show original response</summary><div class="thinking-steps">'
197
  thinking_html += f'<div class="thinking-step">{html.escape(full_response)}</div>'
198
  thinking_html += '</div></details></div>'
199
+ yield f"{thinking_html}{processed_response}"
200
 
 
 
201
  except Exception as e:
202
+ error_msg = f"I apologize, but I encountered an error while generating a response. Error details: {str(e)}"
203
+ yield error_msg
 
204
 
205
 
206
  # Custom CSS for Plant Wisdom.AI styling
 
238
  margin-right: 15%;
239
  border: 1px solid #e0ede0;
240
  color: #2c3338;
 
241
  }
242
 
243
+ .message.assistant p {
244
+ margin-bottom: 12px;
 
 
 
 
 
 
 
245
  }
246
 
247
+ .message.assistant h1 {
248
+ font-size: 1.4em;
249
+ margin-top: 0;
250
+ margin-bottom: 16px;
251
+ color: #2e7d32;
252
  }
253
 
254
+ .message.assistant h2 {
255
+ font-size: 1.2em;
256
+ margin-top: 16px;
257
+ margin-bottom: 12px;
258
+ color: #2e7d32;
259
  }
260
 
261
+ .message.assistant ul, .message.assistant ol {
 
 
 
262
  margin: 12px 0;
263
+ padding-left: 24px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  }
265
 
266
+ .message.assistant li {
267
+ margin-bottom: 6px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
 
 
270
  .thinking-wrapper {
271
  margin-bottom: 12px;
272
  }
 
400
  IMPORTANT: If a question has been asked before in the conversation, acknowledge this and either refer back to the previous answer or provide additional context. Do not simply repeat the same answer verbatim.""",
401
  label="System message"
402
  ),
403
+ gr.Slider(minimum=1, maximum=2048, value=1000, step=1, label="Max new tokens"),
404
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.35, step=0.05, label="Temperature"),
405
  gr.Slider(
406
  minimum=0.1,
407
  maximum=1.0,
408
+ value=0.6,
409
  step=0.05,
410
  label="Top-p (nucleus sampling)",
411
  ),