File size: 20,524 Bytes

e993aba


"""
engine.py
Orquestador principal del motor Savant Simbiótico RRF.
Expone:
- handle_query(text): detecta intención (map/resonance/music/chat) y responde
- access to SimpleTrainer, SelfImprover, MemoryStore for external control
"""
import time # Import time
from .mappings import IcosaMap, DodecaMap
from .resonance import ResonanceSimulator
from .music import MusicAdapter
from .memory import MemoryStore
from .self_improvement import SelfImprover
# from .trainer import SimpleTrainer # Avoid circular import, trainer can be instantiated externally
from .api_helpers import chat_refine
import os # Import os
import pandas as pd # Import pandas
import json # Import json
import pickle # Import pickle

class SavantEngine:
    def __init__(self, structured_data_paths=None):
        self.memory = MemoryStore("SAVANT_memory.jsonl")
        # Load structured data if paths are provided
        self.structured_data = {}
        if structured_data_paths:
             print("Engine: Loading structured data...")
             try:
                 self.structured_data['equations'] = self._load_json_data(structured_data_paths.get('equations'))
                 nodes_raw = self._load_json_data(structured_data_paths.get('icosahedron_nodes'))
                 self.structured_data['icosahedron_nodes'] = nodes_raw.get('nodes', []) if isinstance(nodes_raw, dict) else []
                 self.structured_data['frequencies'] = self._load_csv_data(structured_data_paths.get('frequencies'))
                 self.structured_data['constants'] = self._load_csv_data(structured_data_paths.get('constants'))

                 print("Engine loaded structured data: Equations={}, Nodes={}, Frequencies={}, Constants={}".format(
                     len(self.structured_data['equations']) if self.structured_data['equations'] else 0,
                     len(self.structured_data['icosahedron_nodes']),
                     len(self.structured_data['frequencies']),
                     len(self.structured_data['constants'])
                 ))
             except Exception as e:
                 print(f"Engine: Error loading structured data: {e}")
                 self.structured_data = {} # Reset if loading fails


        # Instantiate components, passing relevant structured data
        self.icosa = IcosaMap(node_data=self.structured_data.get('icosahedron_nodes')) # Pass node data
        self.dodeca = DodecaMap() # No dodecahedron data provided in list
        self.resonator = ResonanceSimulator(frequencies_data=self.structured_data.get('frequencies'), constants_data=self.structured_data.get('constants')) # Pass freq/const data
        self.music = MusicAdapter(frequencies_data=self.structured_data.get('frequencies')) # Pass frequencies data
        self.self_improver = SelfImprover(self.memory, structured_data=self.structured_data) # Pass structured data to SelfImprover


        self._interaction_count = 0 # Initialize interaction count for self-improvement trigger


    # Helper methods for loading data within the Engine (copied from Trainer for self-containment)
    def _load_json_data(self, file_path):
        """Loads data from a JSON file."""
        if not file_path or not os.path.exists(file_path):
            # print(f"JSON file not found or path not provided: {file_path}") # Suppress not found for optional files
            return None
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            # print(f"Successfully loaded JSON data from {file_path}") # Suppress success for cleaner output
            return data
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from {file_path}: {e}")
            return None
        except Exception as e:
            print(f"An unexpected error occurred while loading JSON data: {e}")
            return None

    def _load_csv_data(self, file_path):
        """Loads data from a CSV file using pandas."""
        if not file_path or not os.path.exists(file_path):
             # print(f"CSV file not found or path not provided: {file_path}") # Suppress not found for optional files
             return []
        try:
            df = pd.read_csv(file_path)
            # print(f"Successfully loaded CSV data from {file_path}") # Suppress success for cleaner output
            return df.to_dict(orient='records')
        except Exception as e:
            print(f"An error occurred while loading CSV data from {file_path}: {e}")
            return []


    def _classify(self, text):
        t = text.lower()
        # Enhanced classification based on structured data keywords and patterns
        if any(k in t for k in ("equation", "ecuacion", "hamiltoniano", "dirac", "formula", "formulae", "formulas")): # Added formula variations
             return "equation_query"
        if any(k in t for k in ("node", "nodo", "icosahedron", "dodecahedron", "poly", "vertex", "point", "map")): # Added map keyword to node query
             # Check for patterns like "node X" where X is a number
             words = t.split()
             if len(words) > 1 and words[-1].isdigit() and words[-2] in ("node", "nodo"):
                  return "node_query"
             return "node_query"
        if any(k in t for k in ("frecuen", "freq", "music", "nota", "melod", "tono", "pitch", "scale", "musical", "sound", "audio")): # Added sound, audio
             return "music_resonance" # Combine music and resonance intent for simplicity here
        if any(k in t for k in ("constant", "constante", "valor", "unidad", "define", "what is the value of")): # Added "what is the value of"
             return "constant_query"
        if any(k in t for k in ("resonance", "resonar", "resonant", "vibration", "oscilla")): # Specific keywords for resonance without music
             return "resonance_only"

        # Existing classifications (kept as fallbacks or for broader terms)
        # Removed redundant 'reson' and 'sinton' mapping to music_resonance as specific resonance_only added
        if any(k in t for k in ("chat", "hola", "qué", "como", "explica", "tell me", "what is", "describe", "info", "information")): # Added info, information
            return "chat"
        return "chat" # Default to chat


    def handle_query(self, text, base_model_output=None):
        kind = self._classify(text)

        # Handle query types based on structured data
        if kind == "equation_query":
             relevant_eqs = []
             if self.structured_data.get('equations'):
                  # Find equations related to the query (more robust keyword matching)
                  query_words = text.lower().split()
                  relevant_eqs = [eq for eq in self.structured_data['equations'] if any(word in eq.get('nombre', '').lower() or word in eq.get('descripcion', '').lower() or any(comp.lower() in word for comp in eq.get('componentes', [])) for word in query_words)]

             if relevant_eqs:
                  # Provide information about found equations
                  response_parts = ["Based on the RRF Equations data, I found the following relevant equations:"]
                  for eq in relevant_eqs[:3]: # Limit to first 3 for brevity
                      response_parts.append(f"- '{eq.get('nombre', 'N/A')}' ({eq.get('tipo', 'Equation')}): {eq.get('ecuacion', 'N/A')} (Components: {', '.join(eq.get('componentes', []))})")
                  if len(relevant_eqs) > 3:
                       response_parts.append("...")
                  response = "\n".join(response_parts)
                  self._log_interaction(text, base_model_output, response, type="equation_query")
                  return {"type": "equation_query", "query": text, "result": relevant_eqs, "response": response}
             else:
                  response = "I couldn't find any relevant equations in the loaded data for that query."
                  self._log_interaction(text, base_model_output, response, type="equation_query_not_found")
                  return {"type": "equation_query", "query": text, "result": [], "response": response}


        if kind == "node_query":
             relevant_nodes = []
             if self.structured_data.get('icosahedron_nodes'):
                  query_words = text.lower().split()
                  # Try to find by ID first if query contains a number
                  try:
                       node_id = int(query_words[-1]) if query_words and query_words[-1].isdigit() else None
                       if node_id is not None:
                            relevant_nodes = [node for node in self.structured_data['icosahedron_nodes'] if node.get('id') == node_id]
                  except (ValueError, IndexError):
                        pass # Not a number query

                  # If not found by ID or not a number query, search by keyword in description/name
                  if not relevant_nodes:
                       relevant_nodes = [node for node in self.structured_data['icosahedron_nodes'] if any(word in node.get('description', '').lower() or word in node.get('name', '').lower() for word in query_words)]

             if relevant_nodes:
                  response_parts = ["Based on the Icosahedron Nodes data, I found the following relevant nodes:"]
                  for node in relevant_nodes[:3]: # Limit to first 3
                       response_parts.append(f"- Node {node.get('id', 'N/A')}: {node.get('description', node.get('name', 'No description'))} (Coords: ({node.get('x', 'N/A')}, {node.get('y', 'N/A')}, {node.get('z', 'N/A')}))") # Added N/A checks
                  if len(relevant_nodes) > 3:
                       response_parts.append("...")
                  response = "\n".join(response_parts)
                  self._log_interaction(text, base_model_output, response, type="node_query")
                  return {"type": "node_query", "query": text, "result": relevant_nodes, "response": response}
             else:
                  response = "I couldn't find any relevant nodes in the loaded data for that query."
                  self._log_interaction(text, base_model_output, response, type="node_query_not_found")
                  return {"type": "node_query", "query": text, "result": [], "response": response}

        if kind == "music_resonance":
            # Can still trigger resonance simulation and music adaptation
            # Enhance response with information from frequencies/constants if relevant keywords are used
            response_parts = []
            if self.structured_data.get('frequencies') and any(k in text.lower() for k in ("frecuen", "freq", "nota", "pitch", "scale", "musical", "sound", "audio")):
                 query_words = text.lower().split()
                 relevant_freqs = [f for f in self.structured_data['frequencies'] if any(word in f.get('note', '').lower() or word in f.get('role', '').lower() for word in query_words)]
                 if relevant_freqs:
                      response_parts.append("Based on the Frequencies data, I found:")
                      for freq in relevant_freqs[:3]:
                           response_parts.append(f"- Note: {freq.get('note', 'N/A')}, Frequency: {freq.get('frequency', 'N/A')} Hz, Role: {freq.get('role', 'N/A')}") # Added N/A checks
                      if len(relevant_freqs) > 3: response_parts.append("...")

            if self.structured_data.get('constants') and any(k in text.lower() for k in ("constant", "constante")):
                 query_words = text.lower().split()
                 relevant_constants = [c for c in self.structured_data['constants'] if any(word in c.get('name', '').lower() for word in query_words)]
                 if relevant_constants:
                      response_parts.append("Based on the Constants data, I found:")
                      for const in relevant_constants[:3]:
                           response_parts.append(f"- Constant: {const.get('name', 'N/A')}, Value: {const.get('value', 'N/A')}, Units: {const.get('units', 'N/A')}") # Added N/A checks
                      if len(relevant_constants) > 3: response_parts.append("...")

            # Always run resonance simulation and music adaptation for this type
            r = self.resonator.simulate(text)
            seq = self.music.adapt_text_to_music(text)

            response_parts.append(f"Resonance simulation summary: Dominant Frequency={r['summary'].get('dom_freq', 0.0):.4f} Hz, Max Power={r['summary'].get('max_power', 0.0):.4f}.") # Added default values
            response_parts.append(f"Adapted to music sequence (first 5 notes: pitch, duration): {seq[:5]}...")

            response = "\n".join(response_parts) if response_parts else "Processing music and resonance query..."
            self._log_interaction(text, base_model_output, response, type="music_resonance")
            return {"type":"music_resonance","query":text,"resonance_result":r,"music_result":seq, "response": response}

        if kind == "resonance_only": # New handler for resonance-only queries
             # Can still trigger resonance simulation
            response_parts = []
            if self.structured_data.get('constants') and any(k in text.lower() for k in ("constant", "constante")):
                 query_words = text.lower().split()
                 relevant_constants = [c for c in self.structured_data['constants'] if any(word in c.get('name', '').lower() for word in query_words)]
                 if relevant_constants:
                      response_parts.append("Based on the Constants data, I found:")
                      for const in relevant_constants[:3]:
                           response_parts.append(f"- Constant: {const.get('name', 'N/A')}, Value: {const.get('value', 'N/A')}, Units: {const.get('units', 'N/A')}") # Added N/A checks
                      if len(relevant_constants) > 3: response_parts.append("...")

            r = self.resonator.simulate(text)
            response_parts.append(f"Resonance simulation summary: Dominant Frequency={r['summary'].get('dom_freq', 0.0):.4f} Hz, Max Power={r['summary'].get('max_power', 0.0):.4f}.") # Added default values

            response = "\n".join(response_parts) if response_parts else "Processing resonance query..."
            self._log_interaction(text, base_model_output, response, type="resonance_only")
            return {"type":"resonance_only","query":text,"resonance_result":r, "response": response}


        if kind == "constant_query":
             relevant_constants = []
             if self.structured_data.get('constants'):
                  query_words = text.lower().split()
                  relevant_constants = [c for c in self.structured_data['constants'] if any(word in c.get('name', '').lower() or word in c.get('units', '').lower() for word in query_words)]

             if relevant_constants:
                  response_parts = ["Based on the RRF Constants data, I found the following relevant constants:"]
                  for const in relevant_constants[:3]:
                       response_parts.append(f"- Name: {const.get('name', 'N/A')}, Value: {const.get('value', 'N/A')}, Units: {const.get('units', 'N/A')}") # Added N/A checks
                  if len(relevant_constants) > 3: response_parts.append("...")
                  response = "\n".join(response_parts)
                  self._log_interaction(text, base_model_output, response, type="constant_query")
                  return {"type": "constant_query", "query": text, "result": relevant_constants, "response": response}
             else:
                  response = "I couldn't find any relevant constants in the loaded data for that query."
                  self._log_interaction(text, base_model_output, response, type="constant_query_not_found")
                  return {"type": "constant_query", "query": text, "result": [], "response": response}


        if kind == "map":
             # Use icosahedron_nodes data in mapping (already done in IcosaMap)
            node_label = self.icosa.closest_node(text)
            response = f"Mapping query '{text}' to closest node: {node_label}"
            # If we have node data, try to find details about the mapped node
            if self.structured_data.get('icosahedron_nodes'):
                 # Assuming node_label is the description or name from node_data used for embedding
                 # A more robust mapping is needed here to link label back to original node dict by ID
                 # For now, let's just find the node with a matching description/name if possible
                 mapped_node_data = next((node for node in self.structured_data['icosahedron_nodes'] if node.get('description', '').lower() == node_label.lower() or node.get('name', '').lower() == node_label.lower()), None)
                 if mapped_node_data:
                      response += f" (ID: {mapped_node_data.get('id', 'N/A')}, Coords: ({mapped_node_data.get('x', 'N/A')}, {mapped_node_data.get('y', 'N/A')}, {mapped_node_data.get('z', 'N/A')}))" # Added N/A checks


            self._log_interaction(text, base_model_output, response, type="map")
            return {"type":"map","query":text,"node":node_label, "response": response}

        # chat fallback: if base_model_output provided, refine it using self_improver
        if kind == "chat":
            if base_model_output is None:
                # default echo
                base = "Echo: " + text
            else:
                base = base_model_output

            refined = chat_refine(text, base, self_improver=self.self_improver)
            response = refined # Use refined output as the main response for chat
            self._log_interaction(text, base_model_output, refined, type="chat_interaction") # Log chat interaction

            return {"type":"chat","query":text,"base":base,"refined":refined, "response": response}

        # Fallback for unhandled types (shouldn't be reached with current classify)
        response = "I'm not sure how to handle that query based on the available data and functions."
        self._log_interaction(text, base_model_output, response, type="unhandled_query")
        return {"type": "unhandled", "query": text, "response": response}


    def _log_interaction(self, user_input, base_output, final_output, type="interaction"):
         """Logs interaction details to memory and triggers self-improvement if needed."""
         interaction_record = {
             "type": type, # Use the specified type (e.g., chat_interaction, equation_query)
             "user_input": user_input,
             "base_model_output": base_output, # Might be None for non-chat types
             "final_output": final_output, # The response generated by handle_query
             "_ts": time.time() # Add timestamp
         }
         self.memory.add(interaction_record)

         # Periodically trigger self-improvement (e.g., every 10 interactions)
         self._interaction_count = getattr(self, '_interaction_count', 0) + 1
         if self._interaction_count % 10 == 0:
             print("SAVANT: Triggering self-improvement cycle...")
             try:
                 proposal = self.self_improver.propose()
                 accepted, metric = self.self_improver.evaluate_and_apply(proposal)
                 print(f"SAVANT: Self-improvement proposal accepted: {accepted}, New metric: {metric}")
                 self.memory.add({
                     "type": "self_improvement_triggered",
                     "proposal": proposal,
                     "accepted": accepted,
                     "metric": metric,
                     "_ts": time.time()
                 })
             except Exception as si_error:
                     # Log the error and continue
                     error_message = f"Error during self-improvement: {si_error}"
                     print(f"SAVANT: {error_message}")
                     self.memory.add({
                         "type": "self_improvement_error",
                         "error": error_message,
                         "_ts": time.time()
                     })


    # trainer helpers (these are now called externally via SimpleTrainer instance)
    # def run_training_epochs(self, stimuli, epochs=3):
    #     return self.trainer.run_epochs(stimuli, epochs)

    def propose_improvement(self):
        return self.self_improver.propose()

    def apply_improvement(self, proposal):
        return self.self_improver.evaluate_and_apply(proposal)