Frankie-walsh4 commited on
Commit
2405040
·
1 Parent(s): b9938e6

Trying to stop model repeating itself

Browse files
Files changed (1) hide show
  1. app.py +63 -36
app.py CHANGED
@@ -19,26 +19,29 @@ ip_request_counters = defaultdict(int) # Tracks request count per IP
19
  ip_last_reset = {} # Tracks when counters were last reset for each IP
20
  rate_limit_lock = threading.Lock() # Lock for thread-safe counter access
21
 
22
- # Comprehensive patterns to filter out thinking and meta-commentary
23
  THINKING_PATTERNS = [
24
  r"Okay, so I('m| am) (trying to|going to|attempting to)",
25
  r"I need to figure out",
26
  r"I'll start by",
27
  r"Let me try to",
28
  r"I'm trying to understand",
29
- r"First, I know that",
30
  r"I'll need to look into",
31
- r"I'm not entirely sure",
32
  r"I believe this is",
33
  r"I imagine it involves",
34
  r"I think I understand",
35
- r"From what I know",
36
  r"Let me think about",
37
  r"From my understanding",
38
  r"As I understand it",
39
  r"To answer this question",
40
  r"To address this",
41
  r"I'll approach this by",
 
 
 
42
  ]
43
 
44
  def get_client_ip():
@@ -83,10 +86,13 @@ def process_final_response(response_text):
83
  if len(response_text) < 50:
84
  return response_text
85
 
86
- # 1. Remove thinking patterns
87
  for pattern in THINKING_PATTERNS:
88
  response_text = re.sub(pattern, "", response_text, flags=re.IGNORECASE)
89
 
 
 
 
90
  # 2. Split into paragraphs
91
  paragraphs = [p.strip() for p in response_text.split('\n\n') if p.strip()]
92
 
@@ -96,9 +102,18 @@ def process_final_response(response_text):
96
  # Skip too short paragraphs or those that are just meta-commentary
97
  if len(para) < 20 or re.search(r"^(In summary|To summarize|In conclusion)", para, re.IGNORECASE):
98
  continue
99
- filtered_paragraphs.append(para)
 
 
 
 
 
 
 
 
 
100
 
101
- # 4. Remove duplicates and similar paragraphs
102
  unique_paragraphs = []
103
  for current in filtered_paragraphs:
104
  # Clean for comparison
@@ -115,31 +130,41 @@ def process_final_response(response_text):
115
  overlap = len(words_current.intersection(words_existing))
116
  similarity = overlap / min(len(words_current), len(words_existing))
117
 
118
- if similarity > 0.6: # 60% threshold for similarity
119
  is_duplicate = True
120
  break
121
 
122
  if not is_duplicate:
123
  unique_paragraphs.append(current)
124
 
125
- # 5. Structure the response if needed
126
- if len(unique_paragraphs) > 2 and not any(p.startswith('#') for p in unique_paragraphs):
127
- # Try to add headings if response doesn't have them
128
- structured_paragraphs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- # Add main heading
131
- if len(unique_paragraphs) > 0:
132
- structured_paragraphs.append(f"# Key Differences Between OneDrive for Business and SharePoint Online\n")
133
- structured_paragraphs.extend(unique_paragraphs[:2])
134
-
135
- # Add subheadings for remaining content if appropriate
136
- if len(unique_paragraphs) > 2:
137
- structured_paragraphs.append(f"\n## When to Use Each Service\n")
138
- structured_paragraphs.extend(unique_paragraphs[2:])
139
-
140
- final_text = "\n\n".join(structured_paragraphs)
141
  else:
142
- final_text = "\n\n".join(unique_paragraphs)
143
 
144
  return final_text.strip()
145
 
@@ -168,7 +193,7 @@ def respond(
168
  yield limit_message
169
  return
170
 
171
- # Create a more effective system prompt
172
  enhanced_system_message = f"""You are an expert in Microsoft 365 services including SharePoint, OneDrive, Teams, and the Microsoft 365 compliance ecosystem.
173
 
174
  {system_message}
@@ -180,20 +205,22 @@ FORMAT YOUR RESPONSE USING:
180
  - Specific technical details where appropriate
181
 
182
  CRITICAL RESPONSE REQUIREMENTS:
183
- 1. Start IMMEDIATELY with the answer - no preamble or self-reference
184
- 2. NEVER say phrases like "I think", "I believe", "I'm not sure", "I'll try to"
185
- 3. NEVER reveal your thought process or planning
186
  4. Be AUTHORITATIVE and PRECISE
187
  5. Present EACH KEY POINT EXACTLY ONCE
188
- 6. Focus on GOVERNANCE & TECHNICAL details for Microsoft 365
189
- 7. Keep total response under 2000 characters
190
  8. Use 2-3 paragraphs maximum
191
- 9. Provide concrete governance recommendations
192
-
193
- If comparing services:
194
- - List key DIFFERENCES first
195
- - THEN explain when to use each
196
- - End with GOVERNANCE recommendations"""
 
 
197
 
198
  messages = [{"role": "system", "content": enhanced_system_message}]
199
 
 
19
  ip_last_reset = {} # Tracks when counters were last reset for each IP
20
  rate_limit_lock = threading.Lock() # Lock for thread-safe counter access
21
 
22
+ # Expanded comprehensive patterns to filter out thinking and meta-commentary
23
  THINKING_PATTERNS = [
24
  r"Okay, so I('m| am) (trying to|going to|attempting to)",
25
  r"I need to figure out",
26
  r"I'll start by",
27
  r"Let me try to",
28
  r"I'm trying to understand",
29
+ r"First, I (know|think) that",
30
  r"I'll need to look into",
31
+ r"I'm not entirely (sure|clear)",
32
  r"I believe this is",
33
  r"I imagine it involves",
34
  r"I think I understand",
35
+ r"From what I (know|remember)",
36
  r"Let me think about",
37
  r"From my understanding",
38
  r"As I understand it",
39
  r"To answer this question",
40
  r"To address this",
41
  r"I'll approach this by",
42
+ r"I think it's (important|worth) (to note|noting)",
43
+ r"I (think|believe|wonder|should|also wonder|recall)",
44
+ r"I also (think|believe|wonder|should|recall)",
45
  ]
46
 
47
  def get_client_ip():
 
86
  if len(response_text) < 50:
87
  return response_text
88
 
89
+ # 1. Remove thinking patterns more aggressively
90
  for pattern in THINKING_PATTERNS:
91
  response_text = re.sub(pattern, "", response_text, flags=re.IGNORECASE)
92
 
93
+ # Remove first person references completely
94
+ response_text = re.sub(r"\b(I|me|my|mine|myself)\b", "", response_text, flags=re.IGNORECASE)
95
+
96
  # 2. Split into paragraphs
97
  paragraphs = [p.strip() for p in response_text.split('\n\n') if p.strip()]
98
 
 
102
  # Skip too short paragraphs or those that are just meta-commentary
103
  if len(para) < 20 or re.search(r"^(In summary|To summarize|In conclusion)", para, re.IGNORECASE):
104
  continue
105
+
106
+ # Skip paragraphs with thinking patterns
107
+ skip = False
108
+ for pattern in THINKING_PATTERNS:
109
+ if re.search(pattern, para, re.IGNORECASE):
110
+ skip = True
111
+ break
112
+
113
+ if not skip:
114
+ filtered_paragraphs.append(para)
115
 
116
+ # 4. Remove duplicates and similar paragraphs with stricter threshold
117
  unique_paragraphs = []
118
  for current in filtered_paragraphs:
119
  # Clean for comparison
 
130
  overlap = len(words_current.intersection(words_existing))
131
  similarity = overlap / min(len(words_current), len(words_existing))
132
 
133
+ if similarity > 0.5: # 50% threshold for similarity (stricter)
134
  is_duplicate = True
135
  break
136
 
137
  if not is_duplicate:
138
  unique_paragraphs.append(current)
139
 
140
+ # 5. Structure the response based on detected content
141
+ title = ""
142
+ if "retention policies" in response_text.lower() and "retention labels" in response_text.lower():
143
+ title = "# Retention Policies vs. Retention Labels in Microsoft 365"
144
+ elif "onedrive" in response_text.lower() and "sharepoint" in response_text.lower():
145
+ title = "# Key Differences Between OneDrive for Business and SharePoint Online"
146
+ else:
147
+ # Extract a title from the content
148
+ first_para = unique_paragraphs[0] if unique_paragraphs else ""
149
+ first_sentence = first_para.split('.')[0] if first_para else ""
150
+ if len(first_sentence) > 10:
151
+ title = f"# {first_sentence}"
152
+ else:
153
+ title = "# Microsoft 365 Information Management"
154
+
155
+ # Build structured content with max 2-3 paragraphs
156
+ final_paras = []
157
+ if unique_paragraphs:
158
+ # Limit to just 2-3 most relevant paragraphs
159
+ final_paras = unique_paragraphs[:min(3, len(unique_paragraphs))]
160
 
161
+ # Add a "Use cases" section if we have 3+ paragraphs
162
+ if len(unique_paragraphs) > 2:
163
+ final_text = f"{title}\n\n{final_paras[0]}\n\n{final_paras[1]}\n\n## Key Considerations\n\n{final_paras[2]}"
164
+ else:
165
+ final_text = f"{title}\n\n" + "\n\n".join(final_paras)
 
 
 
 
 
 
166
  else:
167
+ final_text = f"{title}\n\nNo content available."
168
 
169
  return final_text.strip()
170
 
 
193
  yield limit_message
194
  return
195
 
196
+ # Create a more effective system prompt with stronger instructions
197
  enhanced_system_message = f"""You are an expert in Microsoft 365 services including SharePoint, OneDrive, Teams, and the Microsoft 365 compliance ecosystem.
198
 
199
  {system_message}
 
205
  - Specific technical details where appropriate
206
 
207
  CRITICAL RESPONSE REQUIREMENTS:
208
+ 1. Start IMMEDIATELY with the answer - NO preamble or self-reference
209
+ 2. NEVER use first person (I, me, my) under any circumstances
210
+ 3. NEVER reveal your thought process - just state facts
211
  4. Be AUTHORITATIVE and PRECISE
212
  5. Present EACH KEY POINT EXACTLY ONCE
213
+ 6. Focus on GOVERNANCE & TECHNICAL details
214
+ 7. Keep total response under 1500 characters
215
  8. Use 2-3 paragraphs maximum
216
+ 9. Provide concrete recommendations
217
+ 10. Write as if from an official Microsoft technical document
218
+
219
+ If comparing two services or features:
220
+ - Begin with clear definitions of both
221
+ - Focus on FUNCTIONAL differences
222
+ - List KEY SCENARIOS for each
223
+ - End with GOVERNANCE implications"""
224
 
225
  messages = [{"role": "system", "content": enhanced_system_message}]
226