/**
 * LlamaCppLLM - node-llama-cpp wrapper as a Runnable
 *
 * @module llm/llama-cpp-llm
 */

import { Runnable } from '../core/runnable.js';
import { AIMessage, HumanMessage } from '../core/message.js';
import { getLlama, LlamaChatSession } from 'node-llama-cpp';

/**
 * LlamaCppLLM - A Runnable wrapper for node-llama-cpp
 *
 * Wraps your LLM calls from agent fundamentals into a reusable,
 * composable Runnable component.
 *
 * Key benefits over raw node-llama-cpp:
 * - Composable with other Runnables via .pipe()
 * - Supports batch processing multiple inputs
 * - Built-in streaming support
 * - Consistent interface across all LLMs
 * - Easy to swap with other LLM providers
 */
export class LlamaCppLLM extends Runnable {
  /**
   * Create a new LlamaCppLLM instance
   *
   * @param {Object} options - Configuration options
   * @param {string} options.modelPath - Path to your GGUF model file (REQUIRED)
   * @param {number} [options.temperature=0.7] - Sampling temperature (0-1)
   *   - Lower (0.1): More focused, deterministic
   *   - Higher (0.9): More creative, random
   * @param {number} [options.topP=0.9] - Nucleus sampling threshold
   * @param {number} [options.topK=40] - Top-K sampling parameter
   * @param {number} [options.maxTokens=2048] - Maximum tokens to generate
   * @param {number} [options.repeatPenalty=1.1] - Penalty for repeating tokens
   * @param {number} [options.contextSize=4096] - Context window size
   * @param {number} [options.batchSize=512] - Batch processing size
   * @param {boolean} [options.verbose=false] - Enable debug logging
   * @param {string[]} [options.stopStrings] - Strings that stop generation
   * @param {Object} [options.chatWrapper] - Custom chat wrapper instance (e.g., QwenChatWrapper)
   *   - If not provided, the library will automatically select the best wrapper for your model
   *
   * @example Basic Setup
   * ```javascript
   * const llm = new LlamaCppLLM({
   *   modelPath: './models/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf',
   *   temperature: 0.7
   * });
   * ```
   *
   * @example With Qwen Chat Wrapper (Discourage Thoughts)
   * ```javascript
   * import { QwenChatWrapper } from 'node-llama-cpp';
   *
   * const llm = new LlamaCppLLM({
   *   modelPath: './models/Qwen3-1.7B-Q6_K.gguf',
   *   temperature: 0.7,
   *   chatWrapper: new QwenChatWrapper({
   *     thoughts: 'discourage'
   *   })
   * });
   * ```
   *
   * @example Different Configurations for Different Tasks
   * ```javascript
   * // Creative writing (higher temperature)
   * const creative = new LlamaCppLLM({
   *   modelPath: './model.gguf',
   *   temperature: 0.9,
   *   maxTokens: 1000
   * });
   *
   * // Factual responses (lower temperature)
   * const factual = new LlamaCppLLM({
   *   modelPath: './model.gguf',
   *   temperature: 0.1,
   *   maxTokens: 500
   * });
   * ```
   */
  constructor(options = {}) {
    super();

    // Validate required options
    this.modelPath = options.modelPath;
    if (!this.modelPath) {
      throw new Error(
          'modelPath is required. Example: new LlamaCppLLM({ modelPath: "./model.gguf" })'
      );
    }

    // Generation parameters
    // These control how the LLM generates text - same as in your fundamentals!
    this.temperature = options.temperature ?? 0.7;
    this.topP = options.topP ?? 0.9;
    this.topK = options.topK ?? 40;
    this.maxTokens = options.maxTokens ?? 2048;
    this.repeatPenalty = options.repeatPenalty ?? 1.1;

    // Context configuration
    this.contextSize = options.contextSize ?? 4096;
    this.batchSize = options.batchSize ?? 512;

    // Behavior
    this.verbose = options.verbose ?? false;

    // Chat wrapper configuration
    // If not provided, LlamaChatSession will auto-select the best wrapper
    this.chatWrapper = options.chatWrapper ?? 'auto';

    // Stop strings - when the model sees these, it stops generating
    // Default includes common chat separators
    this.stopStrings = options.stopStrings ?? [
      'Human:',
      'User:',
      '\n\nHuman:',
      '\n\nUser:'
    ];

    // Internal state (lazy initialized)
    this._llama = null;
    this._model = null;
    this._context = null;
    this._chatSession = null;
    this._initialized = false;
  }

  /**
   * Initialize model (lazy loading)
   *
   * This loads the model only when first needed, not at construction.
   * This pattern is useful because model loading is slow - we only
   * want to do it once and only when we actually need it.
   *
   * @private
   * @throws {Error} If model loading fails
   */
  async _initialize() {
    // Skip if already initialized
    if (this._initialized) return;

    if (this.verbose) {
      console.log(`Loading model: ${this.modelPath}`);
    }

    try {
      // Step 1: Get the llama instance
      this._llama = await getLlama();

      // Step 2: Load the model file
      this._model = await this._llama.loadModel({
        modelPath: this.modelPath
      });

      // Step 3: Create a context for generation
      this._context = await this._model.createContext({
        contextSize: this.contextSize,
        batchSize: this.batchSize
      });

      // Step 4: Create a chat session
      // This manages conversation history for us
      const contextSequence = this._context.getSequence();
      const sessionConfig = {
        contextSequence
      };

      // Add chatWrapper if specified (otherwise LlamaChatSession uses "auto")
      if (this.chatWrapper !== 'auto') {
        sessionConfig.chatWrapper = this.chatWrapper;
      }

      this._chatSession = new LlamaChatSession(sessionConfig);

      this._initialized = true;

      if (this.verbose) {
        console.log('✓ Model loaded successfully');
        if (this.chatWrapper !== 'auto') {
          console.log(`✓ Using custom chat wrapper: ${this.chatWrapper.constructor.name}`);
        } else {
          console.log('✓ Using auto-detected chat wrapper');
        }
      }
    } catch (error) {
      throw new Error(
          `Failed to initialize model at ${this.modelPath}: ${error.message}`
      );
    }
  }

  /**
   * Convert our Message objects to node-llama-cpp chat history format
   *
   * This bridges between our standardized Message types and what
   * node-llama-cpp expects. Think of it as a translator.
   *
   * @private
   * @param {Array<Message>} messages - Array of Message objects
   * @returns {Array<Object>} Chat history in llama.cpp format
   *
   * @example
   * ```javascript
   * // Input: Our messages
   * [
   *   new SystemMessage("You are helpful"),
   *   new HumanMessage("Hi"),
   *   new AIMessage("Hello!")
   * ]
   *
   * // Output: llama.cpp format
   * [
   *   { type: 'system', text: 'You are helpful' },
   *   { type: 'user', text: 'Hi' },
   *   { type: 'model', response: 'Hello!' }
   * ]
   * ```
   */
  _messagesToChatHistory(messages) {
    return messages.map(msg => {
      // System messages: instructions for the AI
      if (msg._type === 'system') {
        return { type: 'system', text: msg.content };
      }
      // Human messages: user input
      else if (msg._type === 'human') {
        return { type: 'user', text: msg.content };
      }
      // AI messages: previous AI responses
      else if (msg._type === 'ai') {
        return { type: 'model', response: msg.content };
      }
      // Tool messages: results from tool execution
      else if (msg._type === 'tool') {
        // Convert tool results to system messages
        return { type: 'system', text: `Tool Result: ${msg.content}` };
      }

      // Fallback: treat unknown types as user messages
      return { type: 'user', text: msg.content };
    });
  }

  /**
   * Clean up model response
   *
   * Sometimes models include extra prefixes or suffixes.
   * This cleans them up for a better user experience.
   *
   * @private
   * @param {string} response - Raw model response
   * @returns {string} Cleaned response
   *
   * @example
   * ```javascript
   * // Before: "Assistant: The answer is 42\n\nHuman: "
   * // After:  "The answer is 42"
   * ```
   */
  _cleanResponse(response) {
    let cleaned = response.trim();

    // Remove "Assistant:" or "AI:" prefixes
    cleaned = cleaned.replace(/^(Assistant|AI):\s*/i, '');

    // Remove any conversation continuations
    cleaned = cleaned.replace(/\n\n(Human|User):.*$/s, '');

    return cleaned.trim();
  }

  /**
   * Main generation method - this is where your LLM calls happen!
   *
   * This is the same as calling `llm.chat(messages)` in your fundamentals,
   * but wrapped to work with the Runnable interface.
   *
   * @async
   * @param {string|Array<Message>} input - User input or message array
   * @param {Object} [config={}] - Runtime configuration
   * @param {number} [config.temperature] - Override temperature for this call
   * @param {number} [config.maxTokens] - Override max tokens for this call
   * @param {boolean} [config.clearHistory=false] - Clear chat history before this call
   * @returns {Promise<AIMessage>} Generated response as AIMessage
   *
   * @example String Input (Simplest)
   * ```javascript
   * const response = await llm.invoke("What is AI?");
   * console.log(response.content); // "AI is..."
   * ```
   *
   * @example Message Array Input (Full Control)
   * ```javascript
   * const messages = [
   *   new SystemMessage("You are a helpful assistant"),
   *   new HumanMessage("What is AI?")
   * ];
   * const response = await llm.invoke(messages);
   * ```
   *
   * @example Runtime Configuration
   * ```javascript
   * // Override temperature for this specific call
   * const response = await llm.invoke(
   *   "Write a creative story",
   *   { temperature: 0.9, maxTokens: 500 }
   * );
   * ```
   *
   * @example Clear History Before Call
   * ```javascript
   * // Ensure fresh context with no prior conversation
   * const response = await llm.invoke(
   *   "What is AI?",
   *   { clearHistory: true }
   * );
   * ```
   *
   * @example In a Pipeline (Composition)
   * ```javascript
   * const pipeline = promptFormatter
   *   .pipe(llm)
   *   .pipe(outputParser);
   *
   * const result = await pipeline.invoke("user input");
   * ```
   */
  async _call(input, config = {}) {
    // Ensure model is loaded (only happens once)
    await this._initialize();

    // Clear history if requested (important for batch processing)
    if (config.clearHistory) {
      this._chatSession.setChatHistory([]);
    }

    // Handle different input types
    let messages;
    if (typeof input === 'string') {
      messages = [new HumanMessage(input)];
    } else if (Array.isArray(input)) {
      messages = input;
    } else {
      throw new Error(
          'Input must be a string or array of messages. ' +
          'Example: "Hello" or [new HumanMessage("Hello")]'
      );
    }

    // Extract system message if present
    const systemMessages = messages.filter(msg => msg._type === 'system');
    const systemPrompt = systemMessages.length > 0
        ? systemMessages[0].content
        : '';

    // Convert our Message objects to llama.cpp format
    const chatHistory = this._messagesToChatHistory(messages);
    this._chatSession.setChatHistory(chatHistory);

    // ALWAYS set system prompt (either new value or empty string to clear)
    this._chatSession.systemPrompt = systemPrompt;

    try {
      // Build prompt options
      const promptOptions = {
        temperature: config.temperature ?? this.temperature,
        topP: config.topP ?? this.topP,
        topK: config.topK ?? this.topK,
        maxTokens: config.maxTokens ?? this.maxTokens,
        repeatPenalty: config.repeatPenalty ?? this.repeatPenalty,
        customStopTriggers: config.stopStrings ?? this.stopStrings
      };

      // Add random seed if temperature > 0 and no seed specified
      // This ensures randomness works properly
      if (promptOptions.temperature > 0 && config.seed === undefined) {
        promptOptions.seed = Math.floor(Math.random() * 1000000);
      } else if (config.seed !== undefined) {
        promptOptions.seed = config.seed;
      }

      // Generate response using prompt (simpler than promptWithMeta for non-streaming)
      const response = await this._chatSession.prompt('', promptOptions);

      // Return as AIMessage for consistency
      return new AIMessage(response);
    } catch (error) {
      throw new Error(`Generation failed: ${error.message}`);
    }
  }

  /**
   * Batch processing with history isolation
   *
   * Processes multiple inputs sequentially, ensuring each gets a clean chat history.
   * Note: Local models process requests sequentially, so there's no performance
   * benefit compared to calling invoke() multiple times.
   *
   * @async
   * @param {Array<string|Array<Message>>} inputs - Array of inputs to process
   * @param {Object} [config={}] - Runtime configuration
   * @returns {Promise<Array<AIMessage>>} Array of generated responses
   *
   * @example
   * ```javascript
   * const questions = ["What is AI?", "What is ML?", "What is DL?"];
   * const answers = await llm.batch(questions);
   * ```
   */
  async batch(inputs, config = {}) {
    const results = [];
    for (const input of inputs) {
      // Clear history before each batch item to prevent contamination
      const result = await this._call(input, { ...config, clearHistory: true });
      results.push(result);
    }
    return results;
  }

  /**
   * Streaming generation - show results as they're generated!
   *
   * This is the same as _call() but yields chunks as they arrive,
   * like the typing effect you see in ChatGPT.
   *
   * @async
   * @generator
   * @param {string|Array<Message>} input - User input or message array
   * @param {Object} [config={}] - Runtime configuration
   * @yields {AIMessage} Chunks of generated text
   *
   * @example Basic Streaming
   * ```javascript
   * console.log("Response: ");
   * for await (const chunk of llm.stream("Tell me a story")) {
   *   process.stdout.write(chunk.content); // Print without newline
   * }
   * console.log("\nDone!");
   * ```
   *
   * @example Streaming in a Pipeline
   * ```javascript
   * const pipeline = promptFormatter
   *   .pipe(llm)
   *   .pipe(parser);
   *
   * // Only the last step (parser) gets streamed chunks
   * for await (const chunk of pipeline.stream(input)) {
   *   console.log(chunk);
   * }
   * ```
   *
   * @example Building a Chat UI
   * ```javascript
   * async function streamToUI(input) {
   *   let fullResponse = '';
   *
   *   for await (const chunk of llm.stream(input)) {
   *     fullResponse += chunk.content;
   *     updateUI(fullResponse); // Update your UI in real-time
   *   }
   * }
   * ```
   */
  async* stream(input, config = {}) {
    await this._initialize();

    // Clear history if requested
    if (config.clearHistory) {
      this._chatSession.setChatHistory([]);
    }

    // Handle different input types (same as _call)
    let messages;
    if (typeof input === 'string') {
      messages = [new HumanMessage(input)];
    } else if (Array.isArray(input)) {
      messages = input;
    } else {
      throw new Error(
          'Input must be a string or array of messages for streaming'
      );
    }

    // Extract system message if present
    const systemMessages = messages.filter(msg => msg._type === 'system');
    const systemPrompt = systemMessages.length > 0
        ? systemMessages[0].content
        : '';

    // Set up chat history
    const chatHistory = this._messagesToChatHistory(messages);
    this._chatSession.setChatHistory(chatHistory);

    // ALWAYS set system prompt (either new value or empty string to clear)
    this._chatSession.systemPrompt = systemPrompt;

    try {
      // Build prompt options
      const promptOptions = {
        temperature: config.temperature ?? this.temperature,
        topP: config.topP ?? this.topP,
        topK: config.topK ?? this.topK,
        maxTokens: config.maxTokens ?? this.maxTokens,
        repeatPenalty: config.repeatPenalty ?? this.repeatPenalty,
        customStopTriggers: config.stopStrings ?? this.stopStrings
      };

      // Add random seed if temperature > 0 and no seed specified
      if (promptOptions.temperature > 0 && config.seed === undefined) {
        promptOptions.seed = Math.floor(Math.random() * 1000000);
      } else if (config.seed !== undefined) {
        promptOptions.seed = config.seed;
      }

      // Use onTextChunk callback to stream chunks as they arrive
      const self = this;
      promptOptions.onTextChunk = (chunk) => {
        // This callback is synchronous, so we can't yield directly
        // We'll collect chunks and yield them after
        self._currentStreamChunks = self._currentStreamChunks || [];
        self._currentStreamChunks.push(chunk);
      };

      // Initialize chunk collection
      this._currentStreamChunks = [];

      // Start generation (this will call onTextChunk as it generates)
      const responsePromise = this._chatSession.prompt('', promptOptions);

      // Yield chunks as they become available
      let lastYieldedIndex = 0;

      // Poll for new chunks
      while (true) {
        // Yield any new chunks
        while (lastYieldedIndex < this._currentStreamChunks.length) {
          yield new AIMessage(this._currentStreamChunks[lastYieldedIndex], {
            additionalKwargs: { chunk: true }
          });
          lastYieldedIndex++;
        }

        // Check if generation is complete
        const isDone = await Promise.race([
          responsePromise.then(() => true),
          new Promise(resolve => setTimeout(() => resolve(false), 10))
        ]);

        if (isDone) {
          // Yield any remaining chunks
          while (lastYieldedIndex < this._currentStreamChunks.length) {
            yield new AIMessage(this._currentStreamChunks[lastYieldedIndex], {
              additionalKwargs: { chunk: true }
            });
            lastYieldedIndex++;
          }
          break;
        }
      }

      // Wait for the full response
      await responsePromise;

      // Clean up
      delete this._currentStreamChunks;

    } catch (error) {
      throw new Error(`Streaming failed: ${error.message}`);
    }
  }

  /**
   * Cleanup resources
   *
   * LLMs hold resources in memory. Call this when you're done
   * to free them up properly.
   *
   * @async
   * @returns {Promise<void>}
   *
   * @example
   * ```javascript
   * const llm = new LlamaCppLLM({ modelPath: './model.gguf' });
   *
   * try {
   *   const response = await llm.invoke("Hello");
   *   console.log(response.content);
   * } finally {
   *   await llm.dispose(); // Always clean up!
   * }
   * ```
   *
   * @example With Multiple Uses
   * ```javascript
   * const llm = new LlamaCppLLM({ modelPath: './model.gguf' });
   *
   * // Use it many times
   * await llm.invoke("Question 1");
   * await llm.invoke("Question 2");
   * await llm.batch(["Q3", "Q4", "Q5"]);
   *
   * // Clean up when completely done
   * await llm.dispose();
   * ```
   */
  async dispose() {
    if (this._context) {
      await this._context.dispose();
      this._context = null;
    }
    if (this._model) {
      await this._model.dispose();
      this._model = null;
    }
    this._chatSession = null;
    this._initialized = false;

    if (this.verbose) {
      console.log('✓ Model resources disposed');
    }
  }

  /**
   * String representation for debugging
   *
   * @returns {string} Human-readable representation
   *
   * @example
   * ```javascript
   * const llm = new LlamaCppLLM({ modelPath: './llama-2-7b.gguf' });
   * console.log(llm.toString());
   * // "LlamaCppLLM(model=./llama-2-7b.gguf)"
   *
   * // Useful in pipelines
   * const pipeline = formatter.pipe(llm).pipe(parser);
   * console.log(pipeline.toString());
   * // "PromptFormatter() | LlamaCppLLM(model=./llama-2-7b.gguf) | OutputParser()"
   * ```
   */
  toString() {
    return `LlamaCppLLM(model=${this.modelPath})`;
  }
}

export default LlamaCppLLM;