// ═══════════════════════════════════════════
// HYBRID RAG RETRIEVAL LIBRARY
// Pure JS · No dependencies · Browser-ready
// ═══════════════════════════════════════════

// ─────────────────────────────────────────
// TextProcessor — normalization, sentence splitting, tokenization
// ─────────────────────────────────────────
class TextProcessor {

  static normalize(text) {
    return text
      .toLowerCase()
      .replace(/[^\w\s]/g, '')
      .split(/\s+/)
      .filter(w => w.length > 2);
  }

  static splitSentences(text) {
    return text
      .replace(/\n/g, ' ')
      .split(/[.!?]+/)
      .map(s => s.trim())
      .filter(s => s.length > 0);
  }

}

// ─────────────────────────────────────────
// Similarity — phonetic, levenshtein, n-gram
// ─────────────────────────────────────────
class Similarity {

  static phonetic(word) {
    word = word.toLowerCase();
    return word
      .replace(/ph/g, 'f')
      .replace(/ee/g, 'i')
      .replace(/ea/g, 'i')
      .replace(/oo/g, 'u')
      .replace(/ou/g, 'u')
      .replace(/ck/g, 'k')
      .replace(/c/g, 'k')
      .replace(/z/g, 's')
      .replace(/x/g, 'ks');
  }

  static levenshtein(a, b) {
    let matrix = [];
    for (let i = 0; i <= b.length; i++) matrix[i] = [i];
    for (let j = 0; j <= a.length; j++) matrix[0][j] = j;

    for (let i = 1; i <= b.length; i++) {
      for (let j = 1; j <= a.length; j++) {
        if (b[i - 1] === a[j - 1])
          matrix[i][j] = matrix[i - 1][j - 1];
        else
          matrix[i][j] = Math.min(
            matrix[i - 1][j - 1] + 1,
            matrix[i][j - 1] + 1,
            matrix[i - 1][j] + 1
          );
      }
    }
    return matrix[b.length][a.length];
  }

  static ngrams(str, n = 3) {
    let grams = [];
    for (let i = 0; i <= str.length - n; i++)
      grams.push(str.substring(i, i + n));
    return grams;
  }

  static ngramSimilarity(a, b) {
    let g1 = this.ngrams(a);
    let g2 = this.ngrams(b);
    let set2 = new Set(g2);
    let matches = 0;
    g1.forEach(g => { if (set2.has(g)) matches++; });
    return matches / Math.max(g1.length, g2.length, 1);
  }

}

// ─────────────────────────────────────────
// IndexBuilder — inverted index + document frequency
// ─────────────────────────────────────────
class IndexBuilder {

  constructor() {
    this.sentences = [];
    this.index = {};
    this.df = {};
    this.docs = [];
  }

  build(text) {
    this.sentences = TextProcessor.splitSentences(text);
    this.index = {};
    this.df = {};
    this.docs = [];

    this.sentences.forEach((sentence, id) => {
      let words = TextProcessor.normalize(sentence);
      this.docs[id] = words;
      let unique = [...new Set(words)];

      unique.forEach(w => {
        if (!this.index[w]) this.index[w] = [];
        this.index[w].push(id);
        this.df[w] = (this.df[w] || 0) + 1;
      });
    });
  }

}

// ─────────────────────────────────────────
// Ranker — BM25 + hybrid scoring
// ─────────────────────────────────────────
class Ranker {

  static bm25(queryWords, words, df, N) {
    let score = 0;
    queryWords.forEach(q => {
      let tf = words.filter(w => w === q).length;
      if (tf > 0) {
        let idf = Math.log((N + 1) / (df[q] || 1));
        score += tf * idf * 2;
      }
    });
    return score;
  }

  static hybrid(queryWords, sentenceWords, sentence, df, N) {
    let score = this.bm25(queryWords, sentenceWords, df, N);

    queryWords.forEach(q => {
      sentenceWords.forEach(w => {
        let pw = Similarity.phonetic(w);
        let pq = Similarity.phonetic(q);

        if (Similarity.levenshtein(pw, pq) <= 1)
          score += 0.7;

        let sim = Similarity.ngramSimilarity(pw, pq);
        if (sim > 0.5)
          score += sim;
      });
    });

    if (sentence.toLowerCase().includes(queryWords.join(' ')))
      score += 4;

    return score;
  }

}

// ─────────────────────────────────────────
// Retriever — candidate search + ranking
// ─────────────────────────────────────────
class Retriever {

  constructor(indexBuilder) {
    this.index = indexBuilder.index;
    this.docs = indexBuilder.docs;
    this.df = indexBuilder.df;
    this.sentences = indexBuilder.sentences;
  }

  search(query) {
    let queryWords = TextProcessor.normalize(query);
    let candidates = new Set();

    queryWords.forEach(w => {
      (this.index[w] || []).forEach(id => candidates.add(id));
    });

    // Also add fuzzy candidates via phonetic matching
    queryWords.forEach(q => {
      let pq = Similarity.phonetic(q);
      Object.keys(this.index).forEach(w => {
        let pw = Similarity.phonetic(w);
        if (Similarity.levenshtein(pw, pq) <= 1) {
          this.index[w].forEach(id => candidates.add(id));
        }
      });
    });

    let scored = [];
    candidates.forEach(id => {
      let words = this.docs[id];
      let sentence = this.sentences[id];
      let score = Ranker.hybrid(
        queryWords,
        words,
        sentence,
        this.df,
        this.sentences.length
      );
      if (score > 0)
        scored.push({ id, score, sentence });
    });

    scored.sort((a, b) => b.score - a.score);
    return scored;
  }

}

// ─────────────────────────────────────────
// ContextBuilder — sentence window extraction
// ─────────────────────────────────────────
class ContextBuilder {

  static window(sentences, id, size = 1) {
    let start = Math.max(0, id - size);
    let end = Math.min(sentences.length, id + size + 1);
    return sentences.slice(start, end).join('. ');
  }

}

// ─────────────────────────────────────────
// HybridRAG — main engine
// ─────────────────────────────────────────
class HybridRAG {

  constructor() {
    this.indexBuilder = new IndexBuilder();
    this.retriever = null;
    this.indexed = false;
    this.sourceCount = 0;
    this.sentenceCount = 0;
  }

  index(text) {
    this.indexBuilder.build(text);
    this.retriever = new Retriever(this.indexBuilder);
    this.indexed = true;
    this.sourceCount++;
    this.sentenceCount = this.indexBuilder.sentences.length;
    return {
      sentences: this.sentenceCount,
      uniqueTerms: Object.keys(this.indexBuilder.index).length
    };
  }

  addText(text) {
    // Append to existing index by rebuilding with combined text
    const existingSentences = this.indexBuilder.sentences.join('. ');
    const combined = existingSentences ? existingSentences + '. ' + text : text;
    return this.index(combined);
  }

  query(query, topK = 5, windowSize = 1) {
    if (!this.indexed || !this.retriever) {
      return { passages: [], prompt: '', ranked: [], error: 'No text indexed yet.' };
    }

    let ranked = this.retriever.search(query);
    let passages = [];
    let seen = new Set();

    ranked.slice(0, topK).forEach(r => {
      let ctx = ContextBuilder.window(
        this.indexBuilder.sentences,
        r.id,
        windowSize
      );
      // Deduplicate overlapping windows
      if (!seen.has(ctx)) {
        seen.add(ctx);
        passages.push({
          text: ctx,
          score: r.score,
          sentenceId: r.id,
          original: r.sentence
        });
      }
    });

    let prompt =
      'Use the following context to answer the question:\n\n' +
      passages.map((p, i) => `[${i + 1}] (score: ${p.score.toFixed(2)}) ${p.text}`).join('\n\n') +
      '\n\nQuestion: ' + query +
      '\nAnswer:';

    return {
      passages,
      prompt,
      ranked: ranked.slice(0, topK),
      totalCandidates: ranked.length
    };
  }

  getStats() {
    return {
      indexed: this.indexed,
      sentences: this.sentenceCount,
      uniqueTerms: Object.keys(this.indexBuilder.index).length,
      sources: this.sourceCount
    };
  }

  clear() {
    this.indexBuilder = new IndexBuilder();
    this.retriever = null;
    this.indexed = false;
    this.sourceCount = 0;
    this.sentenceCount = 0;
  }

}