Spaces:

PlantWisdom
/

Data_Management

Sleeping

App Files Files Community

Frankie-walsh4 commited on Mar 26, 2025

Commit

bca9833

1 Parent(s): a03b5fc

change for AI thinking

Browse files

Files changed (1) hide show

app.py +51 -10

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ For more information on `huggingface_hub` Inference API support, please check th
 client = InferenceClient("Trinoid/Data_Management")
 def clean_response(text):
-    """Clean up response by removing meta-text and thinking artifacts"""
     # Remove thinking phrases
     thinking_patterns = [
         r"I need to figure out",
@@ -22,17 +22,55 @@ def clean_response(text):
         r"I'm not entirely sure",
         r"I believe this is",
         r"I imagine it involves",
     ]
     for pattern in thinking_patterns:
         text = re.sub(pattern, "", text, flags=re.IGNORECASE)
-    # Remove repeating paragraphs
     paragraphs = text.split('\n\n')
-    unique_paragraphs = []
     for p in paragraphs:
-        if p and p not in unique_paragraphs and len(p.strip()) > 20:
-            unique_paragraphs.append(p)
     return '\n\n'.join(unique_paragraphs)
@@ -44,18 +82,21 @@ def respond(
     temperature,
     top_p,
 ):
-    # Create a more structured system prompt
     enhanced_system_message = f"""
 {system_message}
-IMPORTANT INSTRUCTIONS FOR YOUR RESPONSES:
 1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
 2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
 3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
 4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
-5. BE CONCISE AND FOCUSED - AVOID UNNECESSARY REPETITION.
-6. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
-7. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
 """
     messages = [{"role": "system", "content": enhanced_system_message}]

 client = InferenceClient("Trinoid/Data_Management")
 def clean_response(text):
+    """Clean up response by removing meta-text, thinking artifacts, and repetitive content"""
     # Remove thinking phrases
     thinking_patterns = [
         r"I need to figure out",
         r"I'm not entirely sure",
         r"I believe this is",
         r"I imagine it involves",
+        r"Okay, so I need to",
+        r"From what I know,",
+        r"One of the main reasons to",
+        r"Another reason to",
     ]
     for pattern in thinking_patterns:
         text = re.sub(pattern, "", text, flags=re.IGNORECASE)
+    # Split into paragraphs for deduplication
     paragraphs = text.split('\n\n')
+    # Only keep meaningful paragraphs
+    filtered_paragraphs = []
     for p in paragraphs:
+        if p and len(p.strip()) > 20:  # Only include non-empty paragraphs with substance
+            filtered_paragraphs.append(p)
+    # Remove similar paragraphs (not just exact duplicates)
+    unique_paragraphs = []
+    for i, current_para in enumerate(filtered_paragraphs):
+        is_duplicate = False
+        # Convert to lowercase and remove punctuation for comparison
+        clean_current = re.sub(r'[^\w\s]', '', current_para.lower())
+        for prev_para in unique_paragraphs:
+            # Clean previous paragraph too
+            clean_prev = re.sub(r'[^\w\s]', '', prev_para.lower())
+            # Check for similarity using character overlap
+            # If more than 50% of words match, consider it similar
+            words_current = set(clean_current.split())
+            words_prev = set(clean_prev.split())
+            if len(words_current) > 0 and len(words_prev) > 0:
+                common_words = words_current.intersection(words_prev)
+                similarity = len(common_words) / min(len(words_current), len(words_prev))
+                if similarity > 0.5:  # If more than 50% similar, it's a duplicate concept
+                    is_duplicate = True
+                    break
+        if not is_duplicate:
+            unique_paragraphs.append(current_para)
+    # Add a summary paragraph if we removed a lot
+    if len(unique_paragraphs) < len(filtered_paragraphs) / 2:
+        unique_paragraphs.append("Note: Some repetitive content has been removed from this response for clarity.")
     return '\n\n'.join(unique_paragraphs)
     temperature,
     top_p,
 ):
+    # Create a more structured system prompt with strict instructions about repetition
     enhanced_system_message = f"""
 {system_message}
+CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
 1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
 2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
 3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
 4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
+5. NEVER REPEAT THE SAME INFORMATION IN DIFFERENT WORDS.
+6. MENTION EACH CONCEPT EXACTLY ONCE - DO NOT ELABORATE ON THE SAME IDEA MULTIPLE TIMES.
+7. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
+8. LIMIT YOUR RESPONSE LENGTH TO WHAT IS NECESSARY - BE CONCISE.
+9. WHEN GIVING EXAMPLES, PROVIDE ONE CLEAR EXAMPLE RATHER THAN MULTIPLE SIMILAR ONES.
+10. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
 """
     messages = [{"role": "system", "content": enhanced_system_message}]