Spaces:
Sleeping
Sleeping
Commit ·
bca9833
1
Parent(s): a03b5fc
change for AI thinking
Browse files
app.py
CHANGED
|
@@ -10,7 +10,7 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
| 10 |
client = InferenceClient("Trinoid/Data_Management")
|
| 11 |
|
| 12 |
def clean_response(text):
|
| 13 |
-
"""Clean up response by removing meta-text
|
| 14 |
# Remove thinking phrases
|
| 15 |
thinking_patterns = [
|
| 16 |
r"I need to figure out",
|
|
@@ -22,17 +22,55 @@ def clean_response(text):
|
|
| 22 |
r"I'm not entirely sure",
|
| 23 |
r"I believe this is",
|
| 24 |
r"I imagine it involves",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
]
|
| 26 |
|
| 27 |
for pattern in thinking_patterns:
|
| 28 |
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
| 29 |
|
| 30 |
-
#
|
| 31 |
paragraphs = text.split('\n\n')
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
for p in paragraphs:
|
| 34 |
-
if p and
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
return '\n\n'.join(unique_paragraphs)
|
| 38 |
|
|
@@ -44,18 +82,21 @@ def respond(
|
|
| 44 |
temperature,
|
| 45 |
top_p,
|
| 46 |
):
|
| 47 |
-
# Create a more structured system prompt
|
| 48 |
enhanced_system_message = f"""
|
| 49 |
{system_message}
|
| 50 |
|
| 51 |
-
|
| 52 |
1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
|
| 53 |
2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
|
| 54 |
3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
|
| 55 |
4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
|
| 56 |
-
5.
|
| 57 |
-
6.
|
| 58 |
-
7.
|
|
|
|
|
|
|
|
|
|
| 59 |
"""
|
| 60 |
|
| 61 |
messages = [{"role": "system", "content": enhanced_system_message}]
|
|
|
|
| 10 |
client = InferenceClient("Trinoid/Data_Management")
|
| 11 |
|
| 12 |
def clean_response(text):
|
| 13 |
+
"""Clean up response by removing meta-text, thinking artifacts, and repetitive content"""
|
| 14 |
# Remove thinking phrases
|
| 15 |
thinking_patterns = [
|
| 16 |
r"I need to figure out",
|
|
|
|
| 22 |
r"I'm not entirely sure",
|
| 23 |
r"I believe this is",
|
| 24 |
r"I imagine it involves",
|
| 25 |
+
r"Okay, so I need to",
|
| 26 |
+
r"From what I know,",
|
| 27 |
+
r"One of the main reasons to",
|
| 28 |
+
r"Another reason to",
|
| 29 |
]
|
| 30 |
|
| 31 |
for pattern in thinking_patterns:
|
| 32 |
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
| 33 |
|
| 34 |
+
# Split into paragraphs for deduplication
|
| 35 |
paragraphs = text.split('\n\n')
|
| 36 |
+
|
| 37 |
+
# Only keep meaningful paragraphs
|
| 38 |
+
filtered_paragraphs = []
|
| 39 |
for p in paragraphs:
|
| 40 |
+
if p and len(p.strip()) > 20: # Only include non-empty paragraphs with substance
|
| 41 |
+
filtered_paragraphs.append(p)
|
| 42 |
+
|
| 43 |
+
# Remove similar paragraphs (not just exact duplicates)
|
| 44 |
+
unique_paragraphs = []
|
| 45 |
+
for i, current_para in enumerate(filtered_paragraphs):
|
| 46 |
+
is_duplicate = False
|
| 47 |
+
|
| 48 |
+
# Convert to lowercase and remove punctuation for comparison
|
| 49 |
+
clean_current = re.sub(r'[^\w\s]', '', current_para.lower())
|
| 50 |
+
|
| 51 |
+
for prev_para in unique_paragraphs:
|
| 52 |
+
# Clean previous paragraph too
|
| 53 |
+
clean_prev = re.sub(r'[^\w\s]', '', prev_para.lower())
|
| 54 |
+
|
| 55 |
+
# Check for similarity using character overlap
|
| 56 |
+
# If more than 50% of words match, consider it similar
|
| 57 |
+
words_current = set(clean_current.split())
|
| 58 |
+
words_prev = set(clean_prev.split())
|
| 59 |
+
|
| 60 |
+
if len(words_current) > 0 and len(words_prev) > 0:
|
| 61 |
+
common_words = words_current.intersection(words_prev)
|
| 62 |
+
similarity = len(common_words) / min(len(words_current), len(words_prev))
|
| 63 |
+
|
| 64 |
+
if similarity > 0.5: # If more than 50% similar, it's a duplicate concept
|
| 65 |
+
is_duplicate = True
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
if not is_duplicate:
|
| 69 |
+
unique_paragraphs.append(current_para)
|
| 70 |
+
|
| 71 |
+
# Add a summary paragraph if we removed a lot
|
| 72 |
+
if len(unique_paragraphs) < len(filtered_paragraphs) / 2:
|
| 73 |
+
unique_paragraphs.append("Note: Some repetitive content has been removed from this response for clarity.")
|
| 74 |
|
| 75 |
return '\n\n'.join(unique_paragraphs)
|
| 76 |
|
|
|
|
| 82 |
temperature,
|
| 83 |
top_p,
|
| 84 |
):
|
| 85 |
+
# Create a more structured system prompt with strict instructions about repetition
|
| 86 |
enhanced_system_message = f"""
|
| 87 |
{system_message}
|
| 88 |
|
| 89 |
+
CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
|
| 90 |
1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
|
| 91 |
2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
|
| 92 |
3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
|
| 93 |
4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
|
| 94 |
+
5. NEVER REPEAT THE SAME INFORMATION IN DIFFERENT WORDS.
|
| 95 |
+
6. MENTION EACH CONCEPT EXACTLY ONCE - DO NOT ELABORATE ON THE SAME IDEA MULTIPLE TIMES.
|
| 96 |
+
7. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
|
| 97 |
+
8. LIMIT YOUR RESPONSE LENGTH TO WHAT IS NECESSARY - BE CONCISE.
|
| 98 |
+
9. WHEN GIVING EXAMPLES, PROVIDE ONE CLEAR EXAMPLE RATHER THAN MULTIPLE SIMILAR ONES.
|
| 99 |
+
10. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
|
| 100 |
"""
|
| 101 |
|
| 102 |
messages = [{"role": "system", "content": enhanced_system_message}]
|