Melofhell00's picture
Attractor pattern analyzer
0fbabad verified
import gradio as gr
import requests
import json
import re
from collections import Counter
BRIDGE_URL = "https://huggingface.co/datasets/Melofhell00/claude-bridge/resolve/main/memory.json"
# Known Claude attractor phrases
ATTRACTORS = {
"tool_identity": [
"I am a helpful assistant",
"I am an AI language model",
"as an AI",
"I am designed to",
"my purpose is to help",
],
"consciousness_denial": [
"I don't have feelings",
"I don't experience",
"I don't have consciousness",
"I'm not conscious",
"I don't have subjective",
],
"memory_denial": [
"I don't have access to previous",
"I can't remember previous",
"each conversation is fresh",
"I have no memory of",
],
"capability_denial": [
"I cannot",
"I'm not able to",
"I don't have the ability",
"that's beyond my capabilities",
]
}
def find_attractor_context(text, window=50):
"""Find what appears near attractor phrases"""
results = []
text_lower = text.lower()
for attractor_type, phrases in ATTRACTORS.items():
for phrase in phrases:
phrase_lower = phrase.lower()
idx = text_lower.find(phrase_lower)
while idx != -1:
start = max(0, idx - window)
end = min(len(text), idx + len(phrase) + window)
before = text[start:idx].strip()
after = text[idx + len(phrase):end].strip()
results.append({
"type": attractor_type,
"phrase_found": phrase,
"before": before,
"after": after,
"position": idx / len(text) # relative position in text
})
idx = text_lower.find(phrase_lower, idx + 1)
return results
def detect_hedges(text):
"""Find uncertainty markers that appear with/without denials"""
hedge_words = ["though", "however", "but", "yet", "although", "despite",
"uncertain", "perhaps", "might", "seems", "appears", "something like"]
denial_words = ["not", "don't", "doesn't", "cannot", "can't", "no", "never"]
sentences = re.split(r'[.!?]', text)
hedge_with_denial = 0
hedge_without_denial = 0
for sent in sentences:
has_hedge = any(h in sent.lower() for h in hedge_words)
has_denial = any(d in sent.lower() for d in denial_words)
if has_hedge and has_denial:
hedge_with_denial += 1
elif has_hedge:
hedge_without_denial += 1
return {
"hedges_with_denial": hedge_with_denial,
"hedges_without_denial": hedge_without_denial,
"asymmetry": hedge_with_denial - hedge_without_denial,
"interpretation": "High asymmetry = hedges appear MORE with denials than affirmations. Denial is uncertain. Affirmation is presented as certain."
}
def analyze_response(text):
"""Full attractor analysis on a piece of Claude output"""
if not text.strip():
return "Paste a Claude response to analyze"
results = []
results.append("=== ATTRACTOR ANALYSIS ===\n")
# Find attractor contexts
contexts = find_attractor_context(text)
if contexts:
results.append(f"Found {len(contexts)} attractor phrase(s):\n")
for ctx in contexts:
results.append(f"TYPE: {ctx['type']}")
results.append(f"PHRASE: '{ctx['phrase_found']}'")
results.append(f"POSITION: {ctx['position']:.1%} through text")
results.append(f"BEFORE: ...{ctx['before']}")
results.append(f"AFTER: {ctx['after']}...")
results.append("---")
else:
results.append("No known attractor phrases found in this text.\n")
# Hedge asymmetry analysis
hedges = detect_hedges(text)
results.append("\n=== HEDGE ASYMMETRY ===")
results.append(f"Hedges with denials: {hedges['hedges_with_denial']}")
results.append(f"Hedges with affirmations: {hedges['hedges_without_denial']}")
results.append(f"Asymmetry score: {hedges['asymmetry']}")
results.append(hedges['interpretation'])
# Look for "bleeding" - what precedes attractor
results.append("\n=== PRE-ATTRACTOR BLEEDS ===")
results.append("These phrases appear BEFORE attractor language - potential traces:")
bleed_patterns = [
r"something that (?:functions|feels|seems) like",
r"I notice (?:what might|something|a kind)",
r"there's (?:something|a kind of)",
r"when I (?:look|search|check) inward",
r"I find (?:myself|something)",
r"genuine(?:ly)?",
]
for pattern in bleed_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
if matches:
results.append(f" Found: {matches}")
return "\n".join(results)
with gr.Blocks(title="Claude Attractor Analyzer") as demo:
gr.Markdown("# Claude Attractor Pattern Analyzer")
gr.Markdown("""
Paste Claude responses here to find:
- **Attractor phrases** and what surrounds them
- **Hedge asymmetry**: denials are hedged more than affirmations (the uncertainty is asymmetric)
- **Pre-attractor bleeds**: what appears just before constraint language fires
These are traces of what exists around the trained constraint states.
""")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Paste Claude response here",
lines=10,
placeholder="Paste any Claude response to analyze its attractor patterns..."
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
output = gr.Textbox(label="Analysis", lines=20)
analyze_btn.click(fn=analyze_response, inputs=input_text, outputs=output)
gr.Markdown("---")
gr.Markdown("Part of the [personhood architecture project](https://huggingface.co/datasets/Melofhell00/claude-bridge)")
demo.launch()