Frankie-walsh4 commited on
Commit
a03b5fc
·
1 Parent(s): 3908e5f

change for AI thinking

Browse files
Files changed (1) hide show
  1. app.py +91 -60
app.py CHANGED
@@ -2,12 +2,39 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import time
4
  import html
 
5
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
  client = InferenceClient("Trinoid/Data_Management")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def respond(
13
  message,
@@ -17,17 +44,19 @@ def respond(
17
  temperature,
18
  top_p,
19
  ):
20
- # Add a special instruction to the system message to prevent thinking out loud and repetition
21
- enhanced_system_message = system_message + """
22
-
23
- IMPORTANT INSTRUCTION: You must provide direct, authoritative answers based on your knowledge.
24
- DO NOT reveal your internal thinking process, planning, or self-questioning.
25
- DO NOT say phrases like "I need to figure out" or "I'll start by researching".
26
- DO NOT describe your approach to answering the question.
27
- DO NOT repeat yourself or get stuck in loops of similar content.
28
- Keep your response focused, structured, and concise.
29
- INSTEAD, provide concise, structured, and factual information directly.
30
- Answer as an authoritative expert with deep knowledge of Microsoft 365 services."""
 
 
31
 
32
  messages = [{"role": "system", "content": enhanced_system_message}]
33
 
@@ -39,63 +68,65 @@ Answer as an authoritative expert with deep knowledge of Microsoft 365 services.
39
 
40
  messages.append({"role": "user", "content": message})
41
 
 
42
  thinking_steps = []
43
  full_response = ""
44
  start_time = time.time()
45
- repetition_count = 0
46
- last_segment = ""
47
 
48
- # Use chat completion instead of text generation
49
- for message in client.chat_completion(
50
- messages,
51
- max_tokens=max_tokens,
52
- stream=True,
53
- temperature=temperature,
54
- top_p=top_p,
55
- ):
56
- token = message.choices[0].delta.content
57
- if not token:
58
- continue
59
-
60
- # Check for repetition by comparing with previous chunk
61
- if len(full_response) > 100:
62
- last_100_chars = full_response[-100:]
63
- # If we find the same chunk repeating
64
- if last_100_chars in full_response[:-100] and last_100_chars.strip():
65
- repetition_count += 1
66
- # If we detect significant repetition, abort this generation
67
- if repetition_count > 2:
68
- # Trim off the repetitive part
69
- repetition_index = full_response.rfind(last_100_chars, 0, -100)
70
- if repetition_index > 0:
71
- full_response = full_response[:repetition_index] + "\n\n[Response trimmed to avoid repetition]"
72
  break
73
-
74
- full_response += token
75
-
76
- # Save thinking steps at intervals
77
- current_time = time.time()
78
- if current_time - start_time > 2 or len(full_response) % 150 == 0:
79
- start_time = current_time
80
- thinking_steps.append(full_response)
81
-
82
- # Store last segment for repetition detection
83
- if len(full_response) % 50 == 0:
84
- last_segment = full_response[-50:]
85
 
86
- # Format with thinking history as HTML
87
- if thinking_steps and len(thinking_steps) > 1: # Only show if we have multiple steps
88
- thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
89
- for i, step in enumerate(thinking_steps[:-1]): # Exclude the current step
90
- # Escape HTML to prevent rendering issues
91
- safe_step = html.escape(step)
92
- thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  thinking_html += '</div></details></div>'
94
 
95
- # Yield both thinking and current response
96
- yield f"{thinking_html}{full_response}"
97
- else:
98
- yield full_response
 
 
99
 
100
 
101
  # Custom CSS for Plant Wisdom.AI styling
 
2
  from huggingface_hub import InferenceClient
3
  import time
4
  import html
5
+ import re
6
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
10
  client = InferenceClient("Trinoid/Data_Management")
11
 
12
+ def clean_response(text):
13
+ """Clean up response by removing meta-text and thinking artifacts"""
14
+ # Remove thinking phrases
15
+ thinking_patterns = [
16
+ r"I need to figure out",
17
+ r"I'll start by",
18
+ r"Let me try to",
19
+ r"I'm trying to understand",
20
+ r"First, I know that",
21
+ r"I'll need to look into",
22
+ r"I'm not entirely sure",
23
+ r"I believe this is",
24
+ r"I imagine it involves",
25
+ ]
26
+
27
+ for pattern in thinking_patterns:
28
+ text = re.sub(pattern, "", text, flags=re.IGNORECASE)
29
+
30
+ # Remove repeating paragraphs
31
+ paragraphs = text.split('\n\n')
32
+ unique_paragraphs = []
33
+ for p in paragraphs:
34
+ if p and p not in unique_paragraphs and len(p.strip()) > 20:
35
+ unique_paragraphs.append(p)
36
+
37
+ return '\n\n'.join(unique_paragraphs)
38
 
39
  def respond(
40
  message,
 
44
  temperature,
45
  top_p,
46
  ):
47
+ # Create a more structured system prompt
48
+ enhanced_system_message = f"""
49
+ {system_message}
50
+
51
+ IMPORTANT INSTRUCTIONS FOR YOUR RESPONSES:
52
+ 1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
53
+ 2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
54
+ 3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
55
+ 4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
56
+ 5. BE CONCISE AND FOCUSED - AVOID UNNECESSARY REPETITION.
57
+ 6. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
58
+ 7. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
59
+ """
60
 
61
  messages = [{"role": "system", "content": enhanced_system_message}]
62
 
 
68
 
69
  messages.append({"role": "user", "content": message})
70
 
71
+ # Track generation state
72
  thinking_steps = []
73
  full_response = ""
74
  start_time = time.time()
75
+ last_token_time = time.time()
 
76
 
77
+ try:
78
+ # Use chat completion
79
+ for message in client.chat_completion(
80
+ messages,
81
+ max_tokens=max_tokens,
82
+ stream=True,
83
+ temperature=temperature,
84
+ top_p=top_p,
85
+ ):
86
+ token = message.choices[0].delta.content
87
+ if not token:
88
+ # Check for long pause between tokens (potential stall)
89
+ current_time = time.time()
90
+ if current_time - last_token_time > 5: # 5 second timeout
91
+ if full_response:
 
 
 
 
 
 
 
 
 
92
  break
93
+ continue
94
+
95
+ last_token_time = time.time()
96
+ full_response += token
 
 
 
 
 
 
 
 
97
 
98
+ # Save thinking steps for display only
99
+ current_time = time.time()
100
+ if current_time - start_time > 2 or len(full_response) % 200 == 0:
101
+ start_time = current_time
102
+ thinking_steps.append(full_response)
103
+
104
+ # Format with thinking history as HTML
105
+ if thinking_steps and len(thinking_steps) > 1:
106
+ thinking_html = '<div class="thinking-wrapper"><details><summary>Show thinking process</summary><div class="thinking-steps">'
107
+ for i, step in enumerate(thinking_steps[:-1]):
108
+ safe_step = html.escape(step)
109
+ thinking_html += f'<div class="thinking-step">Step {i+1}: {safe_step}</div>'
110
+ thinking_html += '</div></details></div>'
111
+
112
+ # Always yield the full current response (no cleaning during generation)
113
+ yield f"{thinking_html}{full_response}"
114
+ else:
115
+ yield full_response
116
+
117
+ # Clean up the final response to remove thinking artifacts
118
+ if "I'm trying to understand" in full_response or "I need to figure out" in full_response:
119
+ cleaned_response = clean_response(full_response)
120
+ thinking_html = '<div class="thinking-wrapper"><details><summary>Show original response</summary><div class="thinking-steps">'
121
+ thinking_html += f'<div class="thinking-step">{html.escape(full_response)}</div>'
122
  thinking_html += '</div></details></div>'
123
 
124
+ yield f"{thinking_html}{cleaned_response}"
125
+
126
+ except Exception as e:
127
+ # Handle exceptions gracefully
128
+ error_message = f"I apologize, but I encountered an error while generating a response. Please try rephrasing your question or asking something else."
129
+ yield error_message
130
 
131
 
132
  # Custom CSS for Plant Wisdom.AI styling