Spaces:

PlantWisdom
/

Data_Management

Sleeping

App Files Files Community

Frankie-walsh4 commited on Mar 27, 2025

Commit

2405040

1 Parent(s): b9938e6

Trying to stop model repeating itself

Browse files

Files changed (1) hide show

app.py +63 -36

app.py CHANGED Viewed

@@ -19,26 +19,29 @@ ip_request_counters = defaultdict(int)  # Tracks request count per IP
 ip_last_reset = {}  # Tracks when counters were last reset for each IP
 rate_limit_lock = threading.Lock()  # Lock for thread-safe counter access
-# Comprehensive patterns to filter out thinking and meta-commentary
 THINKING_PATTERNS = [
     r"Okay, so I('m| am) (trying to|going to|attempting to)",
     r"I need to figure out",
     r"I'll start by",
     r"Let me try to",
     r"I'm trying to understand",
-    r"First, I know that",
     r"I'll need to look into",
-    r"I'm not entirely sure",
     r"I believe this is",
     r"I imagine it involves",
     r"I think I understand",
-    r"From what I know",
     r"Let me think about",
     r"From my understanding",
     r"As I understand it",
     r"To answer this question",
     r"To address this",
     r"I'll approach this by",
 ]
 def get_client_ip():
@@ -83,10 +86,13 @@ def process_final_response(response_text):
     if len(response_text) < 50:
         return response_text
-    # 1. Remove thinking patterns
     for pattern in THINKING_PATTERNS:
         response_text = re.sub(pattern, "", response_text, flags=re.IGNORECASE)
     # 2. Split into paragraphs
     paragraphs = [p.strip() for p in response_text.split('\n\n') if p.strip()]
@@ -96,9 +102,18 @@ def process_final_response(response_text):
         # Skip too short paragraphs or those that are just meta-commentary
         if len(para) < 20 or re.search(r"^(In summary|To summarize|In conclusion)", para, re.IGNORECASE):
             continue
-        filtered_paragraphs.append(para)
-    # 4. Remove duplicates and similar paragraphs
     unique_paragraphs = []
     for current in filtered_paragraphs:
         # Clean for comparison
@@ -115,31 +130,41 @@ def process_final_response(response_text):
                 overlap = len(words_current.intersection(words_existing))
                 similarity = overlap / min(len(words_current), len(words_existing))
-                if similarity > 0.6:  # 60% threshold for similarity
                     is_duplicate = True
                     break
         if not is_duplicate:
             unique_paragraphs.append(current)
-    # 5. Structure the response if needed
-    if len(unique_paragraphs) > 2 and not any(p.startswith('#') for p in unique_paragraphs):
-        # Try to add headings if response doesn't have them
-        structured_paragraphs = []
-        # Add main heading
-        if len(unique_paragraphs) > 0:
-            structured_paragraphs.append(f"# Key Differences Between OneDrive for Business and SharePoint Online\n")
-            structured_paragraphs.extend(unique_paragraphs[:2])
-            # Add subheadings for remaining content if appropriate
-            if len(unique_paragraphs) > 2:
-                structured_paragraphs.append(f"\n## When to Use Each Service\n")
-                structured_paragraphs.extend(unique_paragraphs[2:])
-        final_text = "\n\n".join(structured_paragraphs)
     else:
-        final_text = "\n\n".join(unique_paragraphs)
     return final_text.strip()
@@ -168,7 +193,7 @@ def respond(
         yield limit_message
         return
-    # Create a more effective system prompt
     enhanced_system_message = f"""You are an expert in Microsoft 365 services including SharePoint, OneDrive, Teams, and the Microsoft 365 compliance ecosystem.
 {system_message}
@@ -180,20 +205,22 @@ FORMAT YOUR RESPONSE USING:
 - Specific technical details where appropriate
 CRITICAL RESPONSE REQUIREMENTS:
-1. Start IMMEDIATELY with the answer - no preamble or self-reference
-2. NEVER say phrases like "I think", "I believe", "I'm not sure", "I'll try to"
-3. NEVER reveal your thought process or planning
 4. Be AUTHORITATIVE and PRECISE
 5. Present EACH KEY POINT EXACTLY ONCE
-6. Focus on GOVERNANCE & TECHNICAL details for Microsoft 365
-7. Keep total response under 2000 characters
 8. Use 2-3 paragraphs maximum
-9. Provide concrete governance recommendations
-If comparing services:
-- List key DIFFERENCES first
-- THEN explain when to use each
-- End with GOVERNANCE recommendations"""
     messages = [{"role": "system", "content": enhanced_system_message}]

 ip_last_reset = {}  # Tracks when counters were last reset for each IP
 rate_limit_lock = threading.Lock()  # Lock for thread-safe counter access
+# Expanded comprehensive patterns to filter out thinking and meta-commentary
 THINKING_PATTERNS = [
     r"Okay, so I('m| am) (trying to|going to|attempting to)",
     r"I need to figure out",
     r"I'll start by",
     r"Let me try to",
     r"I'm trying to understand",
+    r"First, I (know|think) that",
     r"I'll need to look into",
+    r"I'm not entirely (sure|clear)",
     r"I believe this is",
     r"I imagine it involves",
     r"I think I understand",
+    r"From what I (know|remember)",
     r"Let me think about",
     r"From my understanding",
     r"As I understand it",
     r"To answer this question",
     r"To address this",
     r"I'll approach this by",
+    r"I think it's (important|worth) (to note|noting)",
+    r"I (think|believe|wonder|should|also wonder|recall)",
+    r"I also (think|believe|wonder|should|recall)",
 ]
 def get_client_ip():
     if len(response_text) < 50:
         return response_text
+    # 1. Remove thinking patterns more aggressively
     for pattern in THINKING_PATTERNS:
         response_text = re.sub(pattern, "", response_text, flags=re.IGNORECASE)
+    # Remove first person references completely
+    response_text = re.sub(r"\b(I|me|my|mine|myself)\b", "", response_text, flags=re.IGNORECASE)
     # 2. Split into paragraphs
     paragraphs = [p.strip() for p in response_text.split('\n\n') if p.strip()]
         # Skip too short paragraphs or those that are just meta-commentary
         if len(para) < 20 or re.search(r"^(In summary|To summarize|In conclusion)", para, re.IGNORECASE):
             continue
+        # Skip paragraphs with thinking patterns
+        skip = False
+        for pattern in THINKING_PATTERNS:
+            if re.search(pattern, para, re.IGNORECASE):
+                skip = True
+                break
+        if not skip:
+            filtered_paragraphs.append(para)
+    # 4. Remove duplicates and similar paragraphs with stricter threshold
     unique_paragraphs = []
     for current in filtered_paragraphs:
         # Clean for comparison
                 overlap = len(words_current.intersection(words_existing))
                 similarity = overlap / min(len(words_current), len(words_existing))
+                if similarity > 0.5:  # 50% threshold for similarity (stricter)
                     is_duplicate = True
                     break
         if not is_duplicate:
             unique_paragraphs.append(current)
+    # 5. Structure the response based on detected content
+    title = ""
+    if "retention policies" in response_text.lower() and "retention labels" in response_text.lower():
+        title = "# Retention Policies vs. Retention Labels in Microsoft 365"
+    elif "onedrive" in response_text.lower() and "sharepoint" in response_text.lower():
+        title = "# Key Differences Between OneDrive for Business and SharePoint Online"
+    else:
+        # Extract a title from the content
+        first_para = unique_paragraphs[0] if unique_paragraphs else ""
+        first_sentence = first_para.split('.')[0] if first_para else ""
+        if len(first_sentence) > 10:
+            title = f"# {first_sentence}"
+        else:
+            title = "# Microsoft 365 Information Management"
+    # Build structured content with max 2-3 paragraphs
+    final_paras = []
+    if unique_paragraphs:
+        # Limit to just 2-3 most relevant paragraphs
+        final_paras = unique_paragraphs[:min(3, len(unique_paragraphs))]
+        # Add a "Use cases" section if we have 3+ paragraphs
+        if len(unique_paragraphs) > 2:
+            final_text = f"{title}\n\n{final_paras[0]}\n\n{final_paras[1]}\n\n## Key Considerations\n\n{final_paras[2]}"
+        else:
+            final_text = f"{title}\n\n" + "\n\n".join(final_paras)
     else:
+        final_text = f"{title}\n\nNo content available."
     return final_text.strip()
         yield limit_message
         return
+    # Create a more effective system prompt with stronger instructions
     enhanced_system_message = f"""You are an expert in Microsoft 365 services including SharePoint, OneDrive, Teams, and the Microsoft 365 compliance ecosystem.
 {system_message}
 - Specific technical details where appropriate
 CRITICAL RESPONSE REQUIREMENTS:
+1. Start IMMEDIATELY with the answer - NO preamble or self-reference
+2. NEVER use first person (I, me, my) under any circumstances
+3. NEVER reveal your thought process - just state facts
 4. Be AUTHORITATIVE and PRECISE
 5. Present EACH KEY POINT EXACTLY ONCE
+6. Focus on GOVERNANCE & TECHNICAL details
+7. Keep total response under 1500 characters
 8. Use 2-3 paragraphs maximum
+9. Provide concrete recommendations
+10. Write as if from an official Microsoft technical document
+If comparing two services or features:
+- Begin with clear definitions of both
+- Focus on FUNCTIONAL differences
+- List KEY SCENARIOS for each
+- End with GOVERNANCE implications"""
     messages = [{"role": "system", "content": enhanced_system_message}]