Spaces:

PlantWisdom
/

Data_Management

Sleeping

App Files Files Community

Frankie-walsh4 commited on Mar 26, 2025

Commit

3908e5f

1 Parent(s): 9abd2f2

change for AI thinking

Browse files

Files changed (1) hide show

app.py +56 -22

app.py CHANGED Viewed

@@ -17,7 +17,19 @@ def respond(
     temperature,
     top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
@@ -30,6 +42,8 @@ def respond(
     thinking_steps = []
     full_response = ""
     start_time = time.time()
     # Use chat completion instead of text generation
     for message in client.chat_completion(
@@ -40,28 +54,48 @@ def respond(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
-        if token:
-            full_response += token
-            # Save thinking steps every 2 seconds or every 100 characters
-            current_time = time.time()
-            if current_time - start_time > 2 or len(full_response) % 100 == 0:
-                start_time = current_time
-                thinking_steps.append(full_response)
-            # Format with thinking history as HTML
-            if thinking_steps:
-                thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
-                for i, step in enumerate(thinking_steps):
-                    # Escape HTML to prevent rendering issues
-                    safe_step = html.escape(step)
-                    thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
-                thinking_html += '</div></details></div>'
-                # Yield both thinking and current response
-                yield f"{thinking_html}{full_response}"
-            else:
-                yield full_response
 # Custom CSS for Plant Wisdom.AI styling

     temperature,
     top_p,
 ):
+    # Add a special instruction to the system message to prevent thinking out loud and repetition
+    enhanced_system_message = system_message + """
+IMPORTANT INSTRUCTION: You must provide direct, authoritative answers based on your knowledge.
+DO NOT reveal your internal thinking process, planning, or self-questioning.
+DO NOT say phrases like "I need to figure out" or "I'll start by researching".
+DO NOT describe your approach to answering the question.
+DO NOT repeat yourself or get stuck in loops of similar content.
+Keep your response focused, structured, and concise.
+INSTEAD, provide concise, structured, and factual information directly.
+Answer as an authoritative expert with deep knowledge of Microsoft 365 services."""
+    messages = [{"role": "system", "content": enhanced_system_message}]
     for val in history:
         if val[0]:
     thinking_steps = []
     full_response = ""
     start_time = time.time()
+    repetition_count = 0
+    last_segment = ""
     # Use chat completion instead of text generation
     for message in client.chat_completion(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
+        if not token:
+            continue
+        # Check for repetition by comparing with previous chunk
+        if len(full_response) > 100:
+            last_100_chars = full_response[-100:]
+            # If we find the same chunk repeating
+            if last_100_chars in full_response[:-100] and last_100_chars.strip():
+                repetition_count += 1
+                # If we detect significant repetition, abort this generation
+                if repetition_count > 2:
+                    # Trim off the repetitive part
+                    repetition_index = full_response.rfind(last_100_chars, 0, -100)
+                    if repetition_index > 0:
+                        full_response = full_response[:repetition_index] + "\n\n[Response trimmed to avoid repetition]"
+                        break
+        full_response += token
+        # Save thinking steps at intervals
+        current_time = time.time()
+        if current_time - start_time > 2 or len(full_response) % 150 == 0:
+            start_time = current_time
+            thinking_steps.append(full_response)
+        # Store last segment for repetition detection
+        if len(full_response) % 50 == 0:
+            last_segment = full_response[-50:]
+        # Format with thinking history as HTML
+        if thinking_steps and len(thinking_steps) > 1:  # Only show if we have multiple steps
+            thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
+            for i, step in enumerate(thinking_steps[:-1]):  # Exclude the current step
+                # Escape HTML to prevent rendering issues
+                safe_step = html.escape(step)
+                thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
+            thinking_html += '</div></details></div>'
+            # Yield both thinking and current response
+            yield f"{thinking_html}{full_response}"
+        else:
+            yield full_response
 # Custom CSS for Plant Wisdom.AI styling