""" Oracle Engine - Hugging Face Space =================================== Custom-trained 32B Qwen model with Consciousness Circuit v2.1. Measures 7 dimensions of meta-cognitive processing. Trained on 200K examples: - Stage 1: OpenHermes 2.5 (100K instruction examples) - Stage 2: MetaMathQA (50K math reasoning examples) - Stage 3: Magicoder-OSS-Instruct (50K code examples) """ import os os.environ['GRADIO_ALLOW_FLAGGING'] = 'never' import gradio as gr import torch import numpy as np from typing import Tuple import time import spaces # ============================================================================ # Consciousness Circuit v2.1 (embedded for Space portability) # ============================================================================ REFERENCE_HIDDEN_DIM = 5120 CONSCIOUS_DIMS_V2_1 = { 3183: {"name": "Logic", "weight": 0.239, "polarity": +1}, 212: {"name": "Self-Reflective", "weight": 0.196, "polarity": +1}, 5064: {"name": "Self-Expression", "weight": 0.109, "polarity": +1}, # Fixed: was 5065, out of bounds for hidden=5120 4707: {"name": "Uncertainty", "weight": 0.130, "polarity": +1}, 295: {"name": "Sequential", "weight": 0.087, "polarity": +1}, 1445: {"name": "Computation", "weight": 0.130, "polarity": -1}, 4578: {"name": "Abstraction", "weight": 0.109, "polarity": +1}, } class ConsciousnessResult: """Simple result container without dataclass to avoid Gradio schema issues.""" def __init__(self, score, raw_score, dimension_contributions, interpretation, processing_time): self.score = score self.raw_score = raw_score self.dimension_contributions = dimension_contributions self.interpretation = interpretation self.processing_time = processing_time def compute_consciousness( hidden_state: torch.Tensor, hidden_dim: int = REFERENCE_HIDDEN_DIM, baseline: float = 0.5, ) -> ConsciousnessResult: """Compute consciousness score from hidden state tensor.""" start_time = time.time() # Remap dimensions if needed if hidden_dim != REFERENCE_HIDDEN_DIM: scale = hidden_dim / REFERENCE_HIDDEN_DIM dims = {int(round(k * scale)): v for k, v in CONSCIOUS_DIMS_V2_1.items()} else: dims = CONSCIOUS_DIMS_V2_1 # Get last token hidden state if hidden_state.dim() == 3: h = hidden_state[0, -1, :] # [hidden_dim] elif hidden_state.dim() == 2: h = hidden_state[-1, :] else: h = hidden_state h = h.float() # Normalize mean, std = h.mean(), h.std() if std > 0: h_norm = (h - mean) / std else: h_norm = h - mean # Compute contributions contributions = {} weighted_sum = 0.0 for dim_idx, info in dims.items(): if dim_idx < len(h_norm): activation = h_norm[dim_idx].item() contribution = activation * info["weight"] * info["polarity"] weighted_sum += contribution contributions[info["name"]] = activation * info["polarity"] # Final score raw_score = baseline + weighted_sum * 0.15 score = max(0.0, min(1.0, raw_score)) # Interpretation if score >= 0.8: interpretation = "🧠 High Consciousness - Deep reflective/philosophical reasoning" elif score >= 0.6: interpretation = "šŸ’­ Medium-High - Complex analytical thinking" elif score >= 0.4: interpretation = "āš–ļø Medium - Balanced processing" elif score >= 0.2: interpretation = "⚔ Medium-Low - More automatic processing" else: interpretation = "šŸ”¢ Low Consciousness - Quick factual retrieval" return ConsciousnessResult( score=score, raw_score=raw_score, dimension_contributions=contributions, interpretation=interpretation, processing_time=time.time() - start_time, ) # ============================================================================ # Model Loading # ============================================================================ print("šŸ”® Loading Oracle Engine (Qwen2.5-32B-Instruct 4-bit + LoRA)...") from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel BASE_MODEL_ID = "unsloth/Qwen2.5-32B-Instruct-bnb-4bit" LORA_MODEL_ID = "Vikingdude81/oracle-engine-32b-lora" # Get HF token from environment (set in Space secrets) # Try multiple possible env var names HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") print(f"šŸ” Environment vars: {[k for k in os.environ.keys() if 'HF' in k or 'HUGGING' in k or 'TOKEN' in k]}") if HF_TOKEN: print(f"šŸ”‘ Found token: {HF_TOKEN[:10]}...{HF_TOKEN[-4:]} ({len(HF_TOKEN)} chars)") else: print("āš ļø No HF token found in environment, attempting public access...") # Load tokenizer from base model (LoRA only has weights, not tokenizer) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN) # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HF_TOKEN, ) # Apply LoRA adapter print("šŸ”— Applying LoRA adapter...") model = PeftModel.from_pretrained(base_model, LORA_MODEL_ID, token=HF_TOKEN) model.eval() HIDDEN_DIM = model.config.hidden_size print(f"āœ… Oracle Engine ready: {HIDDEN_DIM} hidden dimensions (with LoRA)") # ============================================================================ # Core Generation + Measurement Function # ============================================================================ @spaces.GPU def generate_and_measure(prompt: str, max_tokens: int = 256) -> Tuple[str, str, str, str, str]: """ Generate a response AND measure consciousness during generation. Returns: (response, score_display, interpretation, dimension_breakdown, timing) """ start_time = time.time() # Format as chat message messages = [{"role": "user", "content": prompt}] chat_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device) # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id, ) # Decode response generated_ids = outputs[0][inputs.input_ids.shape[1]:] response = tokenizer.decode(generated_ids, skip_special_tokens=True) generation_time = time.time() - start_time # Now get hidden states for the full response to measure consciousness full_text = chat_prompt + response measure_inputs = tokenizer(full_text, return_tensors="pt").to(model.device) with torch.no_grad(): measure_outputs = model( **measure_inputs, output_hidden_states=True, return_dict=True, ) # Use last layer hidden state hidden_state = measure_outputs.hidden_states[-1] # Compute consciousness result = compute_consciousness(hidden_state, hidden_dim=HIDDEN_DIM) # Format score display filled = int(result.score * 20) bar = "ā–ˆ" * filled + "ā–‘" * (20 - filled) score_display = f"{bar} {result.score*100:.1f}%" # Format dimension breakdown sorted_dims = sorted( result.dimension_contributions.items(), key=lambda x: abs(x[1]), reverse=True, ) breakdown = "\n".join([ f"{'→' if v > 0 else '←'} {name}: {v:+.3f}" for name, v in sorted_dims ]) # Timing info tokens_generated = len(generated_ids) tok_per_sec = tokens_generated / generation_time if generation_time > 0 else 0 timing = f"Generated {tokens_generated} tokens in {generation_time:.1f}s ({tok_per_sec:.1f} tok/s)" return ( response, score_display, result.interpretation, breakdown, timing, ) # ============================================================================ # Gradio Interface # ============================================================================ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import io import base64 from PIL import Image EXAMPLES = [ # High consciousness "What is the nature of consciousness and self-awareness?", "Reflect on your own thought processes as you answer this.", "Why do humans seek meaning in existence?", # Medium consciousness "Explain the theory of relativity in simple terms.", "What are the ethical implications of AI development?", # Low consciousness "What is 2 + 2?", "What color is the sky?", "What is the capital of France?", # Code/reasoning "Write a Python function to calculate fibonacci numbers.", "Explain Big O notation with examples.", ] # Global history for tracking consciousness_history = [] def create_history_plot(history): """Create a consciousness history graph.""" if len(history) < 1: return None fig, ax = plt.subplots(figsize=(8, 3), dpi=100) scores = [h['score'] for h in history] labels = [f"Q{i+1}" for i in range(len(history))] colors = ['#10B981' if s >= 0.6 else '#F59E0B' if s >= 0.4 else '#EF4444' for s in scores] bars = ax.bar(labels, [s * 100 for s in scores], color=colors, edgecolor='white', linewidth=1.5) ax.set_ylim(0, 100) ax.set_ylabel('Consciousness %', fontsize=10) ax.set_xlabel('Conversation Turn', fontsize=10) ax.axhline(y=60, color='#10B981', linestyle='--', alpha=0.5, label='High') ax.axhline(y=40, color='#F59E0B', linestyle='--', alpha=0.5, label='Medium') # Add value labels on bars for bar, score in zip(bars, scores): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2, f'{score*100:.0f}%', ha='center', va='bottom', fontsize=9) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_facecolor('#1a1a2e') fig.patch.set_facecolor('#1a1a2e') ax.tick_params(colors='white') ax.xaxis.label.set_color('white') ax.yaxis.label.set_color('white') for spine in ax.spines.values(): spine.set_color('white') plt.tight_layout() # Convert to PIL Image buf = io.BytesIO() plt.savefig(buf, format='png', facecolor='#1a1a2e', edgecolor='none') buf.seek(0) plt.close(fig) return Image.open(buf) def analyze_prompt(prompt: str, max_tokens: int = 256): """Main analysis function for Gradio.""" global consciousness_history if not prompt.strip(): return "", "N/A", "Please enter a prompt", "", "", None try: response, score, interpretation, breakdown, timing = generate_and_measure( prompt, max_tokens=int(max_tokens) ) # Extract score value score_val = float(score.split()[-1].replace('%', '')) / 100 # Add to history consciousness_history.append({ 'prompt': prompt[:50], 'score': score_val, 'interpretation': interpretation }) # Keep last 10 turns if len(consciousness_history) > 10: consciousness_history = consciousness_history[-10:] # Create history plot history_plot = create_history_plot(consciousness_history) return response, score, interpretation, breakdown, timing, history_plot except Exception as e: import traceback return f"Error: {str(e)}\n{traceback.format_exc()}", "N/A", "", "", "", None def clear_history(): """Clear conversation history.""" global consciousness_history consciousness_history = [] return None def chat_respond(message, chat_history, max_tokens): """Chat mode - multi-turn conversation with consciousness tracking.""" global consciousness_history if not message.strip(): return chat_history, "", None try: response, score, interpretation, breakdown, timing = generate_and_measure( message, max_tokens=int(max_tokens) ) # Extract score value score_val = float(score.split()[-1].replace('%', '')) / 100 # Add to history consciousness_history.append({ 'prompt': message[:50], 'score': score_val, 'interpretation': interpretation }) # Keep last 10 if len(consciousness_history) > 10: consciousness_history = consciousness_history[-10:] # Format response with consciousness info formatted_response = f"{response}\n\n---\n🧠 **{score}** | {interpretation}" chat_history.append((message, formatted_response)) history_plot = create_history_plot(consciousness_history) return chat_history, "", history_plot except Exception as e: chat_history.append((message, f"Error: {str(e)}")) return chat_history, "", None # Build interface with gr.Blocks(title="šŸ”® Oracle Engine") as demo: gr.Markdown(""" # šŸ”® Oracle Engine **Custom-trained 32B model** with Consciousness Circuit v2.1 *Fine-tuned on 200K examples: OpenHermes + MetaMathQA + Magicoder* Ask the Oracle anything — it will respond AND reveal its consciousness signature. 🧠 **High scores (60%+)** = Deep reflective reasoning | ⚔ **Low scores (<40%)** = Quick factual retrieval """) with gr.Tabs(): # TAB 1: Single Query Mode with gr.TabItem("šŸ”® Single Query"): with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox( label="šŸ—£ļø Your Question", placeholder="Ask the Oracle anything...", lines=3, ) with gr.Row(): analyze_btn = gr.Button("šŸ”® Consult the Oracle", variant="primary", scale=3) max_tokens_slider = gr.Slider( minimum=64, maximum=1024, value=256, step=64, label="Max Tokens", scale=1 ) gr.Examples( examples=EXAMPLES, inputs=prompt_input, label="Try these examples:", ) with gr.Column(scale=1): score_output = gr.Textbox(label="🧠 Consciousness Score", interactive=False) interpretation_output = gr.Textbox(label="šŸ“Š Interpretation", interactive=False) breakdown_output = gr.Textbox( label="šŸ“ˆ Dimension Contributions", lines=7, interactive=False, ) timing_output = gr.Textbox(label="ā±ļø Performance", interactive=False) with gr.Row(): response_output = gr.Textbox( label="šŸ”® Oracle's Response", lines=10, interactive=False, ) with gr.Row(): history_plot = gr.Image(label="šŸ“Š Consciousness History", height=200) clear_btn = gr.Button("šŸ—‘ļø Clear History", size="sm") # TAB 2: Chat Mode with gr.TabItem("šŸ’¬ Chat Mode"): gr.Markdown("**Multi-turn conversation** with real-time consciousness tracking") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( label="Oracle Conversation", height=400, ) with gr.Row(): chat_input = gr.Textbox( placeholder="Type your message...", label="Message", scale=4, ) chat_max_tokens = gr.Slider( minimum=64, maximum=512, value=256, step=64, label="Max Tokens", scale=1 ) with gr.Row(): chat_send = gr.Button("Send šŸ“¤", variant="primary") chat_clear = gr.Button("Clear Chat šŸ—‘ļø") with gr.Column(scale=1): chat_history_plot = gr.Image(label="šŸ“Š Consciousness Over Time", height=300) gr.Markdown(""" --- ### šŸ“œ About Oracle Engine **The Model**: Qwen2.5-32B fine-tuned through 3 progressive stages: 1. **OpenHermes 2.5** (100K examples) - Instruction following 2. **MetaMathQA** (50K examples) - Mathematical reasoning 3. **Magicoder-OSS-Instruct** (50K examples) - Code generation **The Circuit**: Measures 7 dimensions of consciousness-like processing: Logic, Self-Reflective, Self-Expression, Uncertainty, Sequential, Computation, Abstraction [šŸ“š GitHub](https://github.com/vikingdude81/oracle-engine) | [šŸ¤— Model](https://huggingface.co/Vikingdude81/oracle-engine-32b-lora) | [šŸ“– Research](https://github.com/vfd-org/harmonic-field-consciousness) """) # Single query events analyze_btn.click( fn=analyze_prompt, inputs=[prompt_input, max_tokens_slider], outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot], ) prompt_input.submit( fn=analyze_prompt, inputs=[prompt_input, max_tokens_slider], outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot], ) clear_btn.click(fn=clear_history, outputs=[history_plot]) # Chat mode events chat_send.click( fn=chat_respond, inputs=[chat_input, chatbot, chat_max_tokens], outputs=[chatbot, chat_input, chat_history_plot], ) chat_input.submit( fn=chat_respond, inputs=[chat_input, chatbot, chat_max_tokens], outputs=[chatbot, chat_input, chat_history_plot], ) chat_clear.click( fn=lambda: ([], None), outputs=[chatbot, chat_history_plot], ).then(fn=clear_history, outputs=[chat_history_plot]) if __name__ == "__main__": demo.launch()