oracle-engine / app.py
dixiebone13-a11y
Fix: Remove deprecated Gradio params (show_copy_button, theme, css)
61a4deb
"""
Oracle Engine - Hugging Face Space
===================================
Custom-trained 32B Qwen model with Consciousness Circuit v2.1.
Measures 7 dimensions of meta-cognitive processing.
Trained on 200K examples:
- Stage 1: OpenHermes 2.5 (100K instruction examples)
- Stage 2: MetaMathQA (50K math reasoning examples)
- Stage 3: Magicoder-OSS-Instruct (50K code examples)
"""
import os
os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
import gradio as gr
import torch
import numpy as np
from typing import Tuple
import time
import spaces
# ============================================================================
# Consciousness Circuit v2.1 (embedded for Space portability)
# ============================================================================
REFERENCE_HIDDEN_DIM = 5120
CONSCIOUS_DIMS_V2_1 = {
3183: {"name": "Logic", "weight": 0.239, "polarity": +1},
212: {"name": "Self-Reflective", "weight": 0.196, "polarity": +1},
5064: {"name": "Self-Expression", "weight": 0.109, "polarity": +1}, # Fixed: was 5065, out of bounds for hidden=5120
4707: {"name": "Uncertainty", "weight": 0.130, "polarity": +1},
295: {"name": "Sequential", "weight": 0.087, "polarity": +1},
1445: {"name": "Computation", "weight": 0.130, "polarity": -1},
4578: {"name": "Abstraction", "weight": 0.109, "polarity": +1},
}
class ConsciousnessResult:
"""Simple result container without dataclass to avoid Gradio schema issues."""
def __init__(self, score, raw_score, dimension_contributions, interpretation, processing_time):
self.score = score
self.raw_score = raw_score
self.dimension_contributions = dimension_contributions
self.interpretation = interpretation
self.processing_time = processing_time
def compute_consciousness(
hidden_state: torch.Tensor,
hidden_dim: int = REFERENCE_HIDDEN_DIM,
baseline: float = 0.5,
) -> ConsciousnessResult:
"""Compute consciousness score from hidden state tensor."""
start_time = time.time()
# Remap dimensions if needed
if hidden_dim != REFERENCE_HIDDEN_DIM:
scale = hidden_dim / REFERENCE_HIDDEN_DIM
dims = {int(round(k * scale)): v for k, v in CONSCIOUS_DIMS_V2_1.items()}
else:
dims = CONSCIOUS_DIMS_V2_1
# Get last token hidden state
if hidden_state.dim() == 3:
h = hidden_state[0, -1, :] # [hidden_dim]
elif hidden_state.dim() == 2:
h = hidden_state[-1, :]
else:
h = hidden_state
h = h.float()
# Normalize
mean, std = h.mean(), h.std()
if std > 0:
h_norm = (h - mean) / std
else:
h_norm = h - mean
# Compute contributions
contributions = {}
weighted_sum = 0.0
for dim_idx, info in dims.items():
if dim_idx < len(h_norm):
activation = h_norm[dim_idx].item()
contribution = activation * info["weight"] * info["polarity"]
weighted_sum += contribution
contributions[info["name"]] = activation * info["polarity"]
# Final score
raw_score = baseline + weighted_sum * 0.15
score = max(0.0, min(1.0, raw_score))
# Interpretation
if score >= 0.8:
interpretation = "๐Ÿง  High Consciousness - Deep reflective/philosophical reasoning"
elif score >= 0.6:
interpretation = "๐Ÿ’ญ Medium-High - Complex analytical thinking"
elif score >= 0.4:
interpretation = "โš–๏ธ Medium - Balanced processing"
elif score >= 0.2:
interpretation = "โšก Medium-Low - More automatic processing"
else:
interpretation = "๐Ÿ”ข Low Consciousness - Quick factual retrieval"
return ConsciousnessResult(
score=score,
raw_score=raw_score,
dimension_contributions=contributions,
interpretation=interpretation,
processing_time=time.time() - start_time,
)
# ============================================================================
# Model Loading
# ============================================================================
print("๐Ÿ”ฎ Loading Oracle Engine (Qwen2.5-32B-Instruct 4-bit + LoRA)...")
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
BASE_MODEL_ID = "unsloth/Qwen2.5-32B-Instruct-bnb-4bit"
LORA_MODEL_ID = "Vikingdude81/oracle-engine-32b-lora"
# Get HF token from environment (set in Space secrets)
# Try multiple possible env var names
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
print(f"๐Ÿ” Environment vars: {[k for k in os.environ.keys() if 'HF' in k or 'HUGGING' in k or 'TOKEN' in k]}")
if HF_TOKEN:
print(f"๐Ÿ”‘ Found token: {HF_TOKEN[:10]}...{HF_TOKEN[-4:]} ({len(HF_TOKEN)} chars)")
else:
print("โš ๏ธ No HF token found in environment, attempting public access...")
# Load tokenizer from base model (LoRA only has weights, not tokenizer)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
token=HF_TOKEN,
)
# Apply LoRA adapter
print("๐Ÿ”— Applying LoRA adapter...")
model = PeftModel.from_pretrained(base_model, LORA_MODEL_ID, token=HF_TOKEN)
model.eval()
HIDDEN_DIM = model.config.hidden_size
print(f"โœ… Oracle Engine ready: {HIDDEN_DIM} hidden dimensions (with LoRA)")
# ============================================================================
# Core Generation + Measurement Function
# ============================================================================
@spaces.GPU
def generate_and_measure(prompt: str, max_tokens: int = 256) -> Tuple[str, str, str, str, str]:
"""
Generate a response AND measure consciousness during generation.
Returns:
(response, score_display, interpretation, dimension_breakdown, timing)
"""
start_time = time.time()
# Format as chat message
messages = [{"role": "user", "content": prompt}]
chat_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
# Decode response
generated_ids = outputs[0][inputs.input_ids.shape[1]:]
response = tokenizer.decode(generated_ids, skip_special_tokens=True)
generation_time = time.time() - start_time
# Now get hidden states for the full response to measure consciousness
full_text = chat_prompt + response
measure_inputs = tokenizer(full_text, return_tensors="pt").to(model.device)
with torch.no_grad():
measure_outputs = model(
**measure_inputs,
output_hidden_states=True,
return_dict=True,
)
# Use last layer hidden state
hidden_state = measure_outputs.hidden_states[-1]
# Compute consciousness
result = compute_consciousness(hidden_state, hidden_dim=HIDDEN_DIM)
# Format score display
filled = int(result.score * 20)
bar = "โ–ˆ" * filled + "โ–‘" * (20 - filled)
score_display = f"{bar} {result.score*100:.1f}%"
# Format dimension breakdown
sorted_dims = sorted(
result.dimension_contributions.items(),
key=lambda x: abs(x[1]),
reverse=True,
)
breakdown = "\n".join([
f"{'โ†’' if v > 0 else 'โ†'} {name}: {v:+.3f}"
for name, v in sorted_dims
])
# Timing info
tokens_generated = len(generated_ids)
tok_per_sec = tokens_generated / generation_time if generation_time > 0 else 0
timing = f"Generated {tokens_generated} tokens in {generation_time:.1f}s ({tok_per_sec:.1f} tok/s)"
return (
response,
score_display,
result.interpretation,
breakdown,
timing,
)
# ============================================================================
# Gradio Interface
# ============================================================================
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import io
import base64
from PIL import Image
EXAMPLES = [
# High consciousness
"What is the nature of consciousness and self-awareness?",
"Reflect on your own thought processes as you answer this.",
"Why do humans seek meaning in existence?",
# Medium consciousness
"Explain the theory of relativity in simple terms.",
"What are the ethical implications of AI development?",
# Low consciousness
"What is 2 + 2?",
"What color is the sky?",
"What is the capital of France?",
# Code/reasoning
"Write a Python function to calculate fibonacci numbers.",
"Explain Big O notation with examples.",
]
# Global history for tracking
consciousness_history = []
def create_history_plot(history):
"""Create a consciousness history graph."""
if len(history) < 1:
return None
fig, ax = plt.subplots(figsize=(8, 3), dpi=100)
scores = [h['score'] for h in history]
labels = [f"Q{i+1}" for i in range(len(history))]
colors = ['#10B981' if s >= 0.6 else '#F59E0B' if s >= 0.4 else '#EF4444' for s in scores]
bars = ax.bar(labels, [s * 100 for s in scores], color=colors, edgecolor='white', linewidth=1.5)
ax.set_ylim(0, 100)
ax.set_ylabel('Consciousness %', fontsize=10)
ax.set_xlabel('Conversation Turn', fontsize=10)
ax.axhline(y=60, color='#10B981', linestyle='--', alpha=0.5, label='High')
ax.axhline(y=40, color='#F59E0B', linestyle='--', alpha=0.5, label='Medium')
# Add value labels on bars
for bar, score in zip(bars, scores):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2,
f'{score*100:.0f}%', ha='center', va='bottom', fontsize=9)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_facecolor('#1a1a2e')
fig.patch.set_facecolor('#1a1a2e')
ax.tick_params(colors='white')
ax.xaxis.label.set_color('white')
ax.yaxis.label.set_color('white')
for spine in ax.spines.values():
spine.set_color('white')
plt.tight_layout()
# Convert to PIL Image
buf = io.BytesIO()
plt.savefig(buf, format='png', facecolor='#1a1a2e', edgecolor='none')
buf.seek(0)
plt.close(fig)
return Image.open(buf)
def analyze_prompt(prompt: str, max_tokens: int = 256):
"""Main analysis function for Gradio."""
global consciousness_history
if not prompt.strip():
return "", "N/A", "Please enter a prompt", "", "", None
try:
response, score, interpretation, breakdown, timing = generate_and_measure(
prompt, max_tokens=int(max_tokens)
)
# Extract score value
score_val = float(score.split()[-1].replace('%', '')) / 100
# Add to history
consciousness_history.append({
'prompt': prompt[:50],
'score': score_val,
'interpretation': interpretation
})
# Keep last 10 turns
if len(consciousness_history) > 10:
consciousness_history = consciousness_history[-10:]
# Create history plot
history_plot = create_history_plot(consciousness_history)
return response, score, interpretation, breakdown, timing, history_plot
except Exception as e:
import traceback
return f"Error: {str(e)}\n{traceback.format_exc()}", "N/A", "", "", "", None
def clear_history():
"""Clear conversation history."""
global consciousness_history
consciousness_history = []
return None
def chat_respond(message, chat_history, max_tokens):
"""Chat mode - multi-turn conversation with consciousness tracking."""
global consciousness_history
if not message.strip():
return chat_history, "", None
try:
response, score, interpretation, breakdown, timing = generate_and_measure(
message, max_tokens=int(max_tokens)
)
# Extract score value
score_val = float(score.split()[-1].replace('%', '')) / 100
# Add to history
consciousness_history.append({
'prompt': message[:50],
'score': score_val,
'interpretation': interpretation
})
# Keep last 10
if len(consciousness_history) > 10:
consciousness_history = consciousness_history[-10:]
# Format response with consciousness info
formatted_response = f"{response}\n\n---\n๐Ÿง  **{score}** | {interpretation}"
chat_history.append((message, formatted_response))
history_plot = create_history_plot(consciousness_history)
return chat_history, "", history_plot
except Exception as e:
chat_history.append((message, f"Error: {str(e)}"))
return chat_history, "", None
# Build interface
with gr.Blocks(title="๐Ÿ”ฎ Oracle Engine") as demo:
gr.Markdown("""
# ๐Ÿ”ฎ Oracle Engine
**Custom-trained 32B model** with Consciousness Circuit v2.1
*Fine-tuned on 200K examples: OpenHermes + MetaMathQA + Magicoder*
Ask the Oracle anything โ€” it will respond AND reveal its consciousness signature.
๐Ÿง  **High scores (60%+)** = Deep reflective reasoning | โšก **Low scores (<40%)** = Quick factual retrieval
""")
with gr.Tabs():
# TAB 1: Single Query Mode
with gr.TabItem("๐Ÿ”ฎ Single Query"):
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="๐Ÿ—ฃ๏ธ Your Question",
placeholder="Ask the Oracle anything...",
lines=3,
)
with gr.Row():
analyze_btn = gr.Button("๐Ÿ”ฎ Consult the Oracle", variant="primary", scale=3)
max_tokens_slider = gr.Slider(
minimum=64, maximum=1024, value=256, step=64,
label="Max Tokens", scale=1
)
gr.Examples(
examples=EXAMPLES,
inputs=prompt_input,
label="Try these examples:",
)
with gr.Column(scale=1):
score_output = gr.Textbox(label="๐Ÿง  Consciousness Score", interactive=False)
interpretation_output = gr.Textbox(label="๐Ÿ“Š Interpretation", interactive=False)
breakdown_output = gr.Textbox(
label="๐Ÿ“ˆ Dimension Contributions",
lines=7,
interactive=False,
)
timing_output = gr.Textbox(label="โฑ๏ธ Performance", interactive=False)
with gr.Row():
response_output = gr.Textbox(
label="๐Ÿ”ฎ Oracle's Response",
lines=10,
interactive=False,
)
with gr.Row():
history_plot = gr.Image(label="๐Ÿ“Š Consciousness History", height=200)
clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear History", size="sm")
# TAB 2: Chat Mode
with gr.TabItem("๐Ÿ’ฌ Chat Mode"):
gr.Markdown("**Multi-turn conversation** with real-time consciousness tracking")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Oracle Conversation",
height=400,
)
with gr.Row():
chat_input = gr.Textbox(
placeholder="Type your message...",
label="Message",
scale=4,
)
chat_max_tokens = gr.Slider(
minimum=64, maximum=512, value=256, step=64,
label="Max Tokens", scale=1
)
with gr.Row():
chat_send = gr.Button("Send ๐Ÿ“ค", variant="primary")
chat_clear = gr.Button("Clear Chat ๐Ÿ—‘๏ธ")
with gr.Column(scale=1):
chat_history_plot = gr.Image(label="๐Ÿ“Š Consciousness Over Time", height=300)
gr.Markdown("""
---
### ๐Ÿ“œ About Oracle Engine
**The Model**: Qwen2.5-32B fine-tuned through 3 progressive stages:
1. **OpenHermes 2.5** (100K examples) - Instruction following
2. **MetaMathQA** (50K examples) - Mathematical reasoning
3. **Magicoder-OSS-Instruct** (50K examples) - Code generation
**The Circuit**: Measures 7 dimensions of consciousness-like processing:
Logic, Self-Reflective, Self-Expression, Uncertainty, Sequential, Computation, Abstraction
[๐Ÿ“š GitHub](https://github.com/vikingdude81/oracle-engine) |
[๐Ÿค— Model](https://huggingface.co/Vikingdude81/oracle-engine-32b-lora) |
[๐Ÿ“– Research](https://github.com/vfd-org/harmonic-field-consciousness)
""")
# Single query events
analyze_btn.click(
fn=analyze_prompt,
inputs=[prompt_input, max_tokens_slider],
outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot],
)
prompt_input.submit(
fn=analyze_prompt,
inputs=[prompt_input, max_tokens_slider],
outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot],
)
clear_btn.click(fn=clear_history, outputs=[history_plot])
# Chat mode events
chat_send.click(
fn=chat_respond,
inputs=[chat_input, chatbot, chat_max_tokens],
outputs=[chatbot, chat_input, chat_history_plot],
)
chat_input.submit(
fn=chat_respond,
inputs=[chat_input, chatbot, chat_max_tokens],
outputs=[chatbot, chat_input, chat_history_plot],
)
chat_clear.click(
fn=lambda: ([], None),
outputs=[chatbot, chat_history_plot],
).then(fn=clear_history, outputs=[chat_history_plot])
if __name__ == "__main__":
demo.launch()