Frankie-walsh4 commited on
Commit
3908e5f
·
1 Parent(s): 9abd2f2

change for AI thinking

Browse files
Files changed (1) hide show
  1. app.py +56 -22
app.py CHANGED
@@ -17,7 +17,19 @@ def respond(
17
  temperature,
18
  top_p,
19
  ):
20
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  for val in history:
23
  if val[0]:
@@ -30,6 +42,8 @@ def respond(
30
  thinking_steps = []
31
  full_response = ""
32
  start_time = time.time()
 
 
33
 
34
  # Use chat completion instead of text generation
35
  for message in client.chat_completion(
@@ -40,28 +54,48 @@ def respond(
40
  top_p=top_p,
41
  ):
42
  token = message.choices[0].delta.content
43
- if token:
44
- full_response += token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Save thinking steps every 2 seconds or every 100 characters
47
- current_time = time.time()
48
- if current_time - start_time > 2 or len(full_response) % 100 == 0:
49
- start_time = current_time
50
- thinking_steps.append(full_response)
51
-
52
- # Format with thinking history as HTML
53
- if thinking_steps:
54
- thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
55
- for i, step in enumerate(thinking_steps):
56
- # Escape HTML to prevent rendering issues
57
- safe_step = html.escape(step)
58
- thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
59
- thinking_html += '</div></details></div>'
60
-
61
- # Yield both thinking and current response
62
- yield f"{thinking_html}{full_response}"
63
- else:
64
- yield full_response
65
 
66
 
67
  # Custom CSS for Plant Wisdom.AI styling
 
17
  temperature,
18
  top_p,
19
  ):
20
+ # Add a special instruction to the system message to prevent thinking out loud and repetition
21
+ enhanced_system_message = system_message + """
22
+
23
+ IMPORTANT INSTRUCTION: You must provide direct, authoritative answers based on your knowledge.
24
+ DO NOT reveal your internal thinking process, planning, or self-questioning.
25
+ DO NOT say phrases like "I need to figure out" or "I'll start by researching".
26
+ DO NOT describe your approach to answering the question.
27
+ DO NOT repeat yourself or get stuck in loops of similar content.
28
+ Keep your response focused, structured, and concise.
29
+ INSTEAD, provide concise, structured, and factual information directly.
30
+ Answer as an authoritative expert with deep knowledge of Microsoft 365 services."""
31
+
32
+ messages = [{"role": "system", "content": enhanced_system_message}]
33
 
34
  for val in history:
35
  if val[0]:
 
42
  thinking_steps = []
43
  full_response = ""
44
  start_time = time.time()
45
+ repetition_count = 0
46
+ last_segment = ""
47
 
48
  # Use chat completion instead of text generation
49
  for message in client.chat_completion(
 
54
  top_p=top_p,
55
  ):
56
  token = message.choices[0].delta.content
57
+ if not token:
58
+ continue
59
+
60
+ # Check for repetition by comparing with previous chunk
61
+ if len(full_response) > 100:
62
+ last_100_chars = full_response[-100:]
63
+ # If we find the same chunk repeating
64
+ if last_100_chars in full_response[:-100] and last_100_chars.strip():
65
+ repetition_count += 1
66
+ # If we detect significant repetition, abort this generation
67
+ if repetition_count > 2:
68
+ # Trim off the repetitive part
69
+ repetition_index = full_response.rfind(last_100_chars, 0, -100)
70
+ if repetition_index > 0:
71
+ full_response = full_response[:repetition_index] + "\n\n[Response trimmed to avoid repetition]"
72
+ break
73
+
74
+ full_response += token
75
+
76
+ # Save thinking steps at intervals
77
+ current_time = time.time()
78
+ if current_time - start_time > 2 or len(full_response) % 150 == 0:
79
+ start_time = current_time
80
+ thinking_steps.append(full_response)
81
+
82
+ # Store last segment for repetition detection
83
+ if len(full_response) % 50 == 0:
84
+ last_segment = full_response[-50:]
85
+
86
+ # Format with thinking history as HTML
87
+ if thinking_steps and len(thinking_steps) > 1: # Only show if we have multiple steps
88
+ thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
89
+ for i, step in enumerate(thinking_steps[:-1]): # Exclude the current step
90
+ # Escape HTML to prevent rendering issues
91
+ safe_step = html.escape(step)
92
+ thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
93
+ thinking_html += '</div></details></div>'
94
 
95
+ # Yield both thinking and current response
96
+ yield f"{thinking_html}{full_response}"
97
+ else:
98
+ yield full_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
 
101
  # Custom CSS for Plant Wisdom.AI styling