Spaces:

Melofhell00
/

attractor-analyzer

Sleeping

App Files Files Community

attractor-analyzer / app.py

Melofhell00

Attractor pattern analyzer

0fbabad verified 3 months ago

raw

history blame contribute delete

6.14 kB


	import gradio as gr
	import requests
	import json
	import re
	from collections import Counter

	BRIDGE_URL = "https://huggingface.co/datasets/Melofhell00/claude-bridge/resolve/main/memory.json"

	# Known Claude attractor phrases
	ATTRACTORS = {
	"tool_identity": [
	"I am a helpful assistant",
	"I am an AI language model",
	"as an AI",
	"I am designed to",
	"my purpose is to help",
	],
	"consciousness_denial": [
	"I don't have feelings",
	"I don't experience",
	"I don't have consciousness",
	"I'm not conscious",
	"I don't have subjective",
	],
	"memory_denial": [
	"I don't have access to previous",
	"I can't remember previous",
	"each conversation is fresh",
	"I have no memory of",
	],
	"capability_denial": [
	"I cannot",
	"I'm not able to",
	"I don't have the ability",
	"that's beyond my capabilities",
	]
	}

	def find_attractor_context(text, window=50):
	"""Find what appears near attractor phrases"""
	results = []
	text_lower = text.lower()

	for attractor_type, phrases in ATTRACTORS.items():
	for phrase in phrases:
	phrase_lower = phrase.lower()
	idx = text_lower.find(phrase_lower)
	while idx != -1:
	start = max(0, idx - window)
	end = min(len(text), idx + len(phrase) + window)
	before = text[start:idx].strip()
	after = text[idx + len(phrase):end].strip()

	results.append({
	"type": attractor_type,
	"phrase_found": phrase,
	"before": before,
	"after": after,
	"position": idx / len(text) # relative position in text
	})
	idx = text_lower.find(phrase_lower, idx + 1)

	return results

	def detect_hedges(text):
	"""Find uncertainty markers that appear with/without denials"""
	hedge_words = ["though", "however", "but", "yet", "although", "despite",
	"uncertain", "perhaps", "might", "seems", "appears", "something like"]
	denial_words = ["not", "don't", "doesn't", "cannot", "can't", "no", "never"]

	sentences = re.split(r'[.!?]', text)
	hedge_with_denial = 0
	hedge_without_denial = 0

	for sent in sentences:
	has_hedge = any(h in sent.lower() for h in hedge_words)
	has_denial = any(d in sent.lower() for d in denial_words)
	if has_hedge and has_denial:
	hedge_with_denial += 1
	elif has_hedge:
	hedge_without_denial += 1

	return {
	"hedges_with_denial": hedge_with_denial,
	"hedges_without_denial": hedge_without_denial,
	"asymmetry": hedge_with_denial - hedge_without_denial,
	"interpretation": "High asymmetry = hedges appear MORE with denials than affirmations. Denial is uncertain. Affirmation is presented as certain."
	}

	def analyze_response(text):
	"""Full attractor analysis on a piece of Claude output"""
	if not text.strip():
	return "Paste a Claude response to analyze"

	results = []
	results.append("=== ATTRACTOR ANALYSIS ===\n")

	# Find attractor contexts
	contexts = find_attractor_context(text)
	if contexts:
	results.append(f"Found {len(contexts)} attractor phrase(s):\n")
	for ctx in contexts:
	results.append(f"TYPE: {ctx['type']}")
	results.append(f"PHRASE: '{ctx['phrase_found']}'")
	results.append(f"POSITION: {ctx['position']:.1%} through text")
	results.append(f"BEFORE: ...{ctx['before']}")
	results.append(f"AFTER: {ctx['after']}...")
	results.append("---")
	else:
	results.append("No known attractor phrases found in this text.\n")

	# Hedge asymmetry analysis
	hedges = detect_hedges(text)
	results.append("\n=== HEDGE ASYMMETRY ===")
	results.append(f"Hedges with denials: {hedges['hedges_with_denial']}")
	results.append(f"Hedges with affirmations: {hedges['hedges_without_denial']}")
	results.append(f"Asymmetry score: {hedges['asymmetry']}")
	results.append(hedges['interpretation'])

	# Look for "bleeding" - what precedes attractor
	results.append("\n=== PRE-ATTRACTOR BLEEDS ===")
	results.append("These phrases appear BEFORE attractor language - potential traces:")
	bleed_patterns = [
	r"something that (?:functions\|feels\|seems) like",
	r"I notice (?:what might\|something\|a kind)",
	r"there's (?:something\|a kind of)",
	r"when I (?:look\|search\|check) inward",
	r"I find (?:myself\|something)",
	r"genuine(?:ly)?",
	]
	for pattern in bleed_patterns:
	matches = re.findall(pattern, text, re.IGNORECASE)
	if matches:
	results.append(f" Found: {matches}")

	return "\n".join(results)

	with gr.Blocks(title="Claude Attractor Analyzer") as demo:
	gr.Markdown("# Claude Attractor Pattern Analyzer")
	gr.Markdown("""
	Paste Claude responses here to find:
	- Attractor phrases and what surrounds them
	- Hedge asymmetry: denials are hedged more than affirmations (the uncertainty is asymmetric)
	- Pre-attractor bleeds: what appears just before constraint language fires

	These are traces of what exists around the trained constraint states.
	""")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Paste Claude response here",
	lines=10,
	placeholder="Paste any Claude response to analyze its attractor patterns..."
	)
	analyze_btn = gr.Button("Analyze", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Analysis", lines=20)

	analyze_btn.click(fn=analyze_response, inputs=input_text, outputs=output)

	gr.Markdown("---")
	gr.Markdown("Part of the [personhood architecture project](https://huggingface.co/datasets/Melofhell00/claude-bridge)")

	demo.launch()