export interface RealEntityData {
  text: string;
  frequency: number;
  type: "person" | "system" | "concept" | "action" | "object";
  context: string[];
  traces: string[]; // trace IDs containing this entity
}

export interface ContentInsightData {
  commonPatterns: string[];
  questionCount: number;
  commandCount: number;
  averageResponseLength: number;
  contentThemes: { theme: string; weight: number }[];
}

export interface TraceContentAnalysis {
  entities: RealEntityData[];
  relations: { from: string; to: string; type: string; traces: string[] }[];
  insights: ContentInsightData;
  readingTime: number;
  complexity: number;
  wordCount: number;
  characterCount: number;
  timeWithoutAgentGraph: number; // Estimated time to understand without visualization
  timeSavedPercentage: number; // Percentage of time saved with AgentGraph
  comprehensionScore: number; // How well structured the information is (0-100)
}

// Entity detection patterns
const ENTITY_PATTERNS = {
  person:
    /\b(user|customer|client|person|individual|member|admin|administrator|operator)\b/gi,
  system:
    /\b(system|api|database|service|server|application|platform|framework|engine|model|ai|bot|agent)\b/gi,
  action:
    /\b(process|analyze|generate|create|execute|run|handle|manage|respond|query|search|filter|transform|validate)\b/gi,
  concept:
    /\b(data|information|content|message|request|response|task|job|workflow|pipeline|session|context)\b/gi,
  object:
    /\b(file|document|record|item|entity|node|graph|trace|log|report|output|input|result)\b/gi,
};

// Enhanced relation detection patterns for agent systems
const RELATION_PATTERNS = [
  // Action-based relations
  {
    pattern: /(\w+)\s+(processes|handles|executes|runs|performs)\s+(\w+)/gi,
    type: "processes",
  },
  {
    pattern: /(\w+)\s+(generates|creates|produces|builds|constructs)\s+(\w+)/gi,
    type: "generates",
  },
  {
    pattern: /(\w+)\s+(analyzes|examines|evaluates|reviews|assesses)\s+(\w+)/gi,
    type: "analyzes",
  },
  {
    pattern: /(\w+)\s+(sends|transmits|delivers|provides|supplies)\s+(\w+)/gi,
    type: "sends",
  },
  {
    pattern: /(\w+)\s+(receives|gets|obtains|retrieves|fetches)\s+(\w+)/gi,
    type: "receives",
  },
  {
    pattern: /(\w+)\s+(transforms|converts|modifies|changes|updates)\s+(\w+)/gi,
    type: "transforms",
  },

  // Dependency relations
  {
    pattern: /(\w+)\s+(depends on|requires|needs|relies on)\s+(\w+)/gi,
    type: "depends_on",
  },
  {
    pattern: /(\w+)\s+(uses|utilizes|employs|leverages)\s+(\w+)/gi,
    type: "uses",
  },
  {
    pattern: /(\w+)\s+(calls|invokes|triggers|activates)\s+(\w+)/gi,
    type: "calls",
  },

  // Containment and hierarchy
  {
    pattern: /(\w+)\s+(contains|includes|encompasses|comprises)\s+(\w+)/gi,
    type: "contains",
  },
  {
    pattern: /(\w+)\s+(belongs to|is part of|is within|is in)\s+(\w+)/gi,
    type: "belongs_to",
  },
  {
    pattern: /(\w+)\s+(manages|controls|oversees|supervises)\s+(\w+)/gi,
    type: "manages",
  },

  // Communication patterns
  {
    pattern: /(\w+)\s+(responds to|replies to|answers)\s+(\w+)/gi,
    type: "responds_to",
  },
  {
    pattern: /(\w+)\s+(requests|asks for|queries|seeks)\s+(\w+)/gi,
    type: "requests",
  },
  {
    pattern: /(\w+)\s+(notifies|informs|alerts|tells)\s+(\w+)/gi,
    type: "notifies",
  },

  // Flow relations
  {
    pattern: /(\w+)\s+(flows to|goes to|moves to|passes to)\s+(\w+)/gi,
    type: "flows_to",
  },
  {
    pattern: /(\w+)\s+(comes from|originates from|starts from)\s+(\w+)/gi,
    type: "comes_from",
  },

  // Agent-specific patterns
  {
    pattern:
      /(user|customer)\s+(interacts with|talks to|communicates with)\s+(\w+)/gi,
    type: "interacts_with",
  },
  {
    pattern:
      /(agent|system|bot)\s+(assists|helps|supports)\s+(user|customer)/gi,
    type: "assists",
  },
  {
    pattern:
      /(agent|system)\s+(understands|interprets|processes)\s+(query|request|input)/gi,
    type: "understands",
  },

  // Simple verb-based patterns for broader coverage
  {
    pattern: /(\w+)\s+(to|into|from|with|via|through)\s+(\w+)/gi,
    type: "connects_to",
  },
];

const STOPWORDS = new Set([
  "the",
  "be",
  "to",
  "of",
  "and",
  "a",
  "in",
  "that",
  "have",
  "i",
  "it",
  "for",
  "not",
  "on",
  "with",
  "he",
  "as",
  "you",
  "do",
  "at",
  "this",
  "but",
  "his",
  "by",
  "from",
  "they",
  "we",
  "say",
  "her",
  "she",
  "or",
  "an",
  "will",
  "my",
  "one",
  "all",
  "would",
  "there",
  "their",
  "what",
  "so",
  "up",
  "out",
  "if",
  "about",
  "who",
  "get",
  "which",
  "go",
  "me",
  "when",
  "make",
  "can",
  "like",
  "time",
  "no",
  "just",
  "him",
  "know",
  "take",
  "people",
  "into",
  "year",
  "your",
  "good",
  "some",
  "could",
  "them",
  "see",
  "other",
  "than",
  "then",
  "now",
  "look",
  "only",
  "come",
  "its",
  "over",
  "think",
  "also",
  "back",
  "after",
  "use",
  "two",
  "how",
  "our",
  "work",
  "first",
  "well",
  "way",
  "even",
  "new",
  "want",
  "because",
  "any",
  "these",
  "give",
  "day",
  "most",
  "us",
  "is",
  "was",
  "are",
  "been",
  "has",
  "had",
  "were",
  "said",
  "each",
  "which",
  "their",
  "them",
  "am",
  "being",
  "having",
  "does",
  "did",
  "doing",
  "will",
  "would",
  "should",
  "could",
  "can",
  "may",
  "might",
  "must",
  "shall",
]);

export function extractEntitiesFromText(
  text: string,
  traceId: string
): RealEntityData[] {
  if (!text || typeof text !== "string") return [];

  const entities: Map<string, RealEntityData> = new Map();

  Object.entries(ENTITY_PATTERNS).forEach(([type, pattern]) => {
    const matches = text.match(pattern);
    if (matches) {
      matches.forEach((match) => {
        const cleanMatch = match.toLowerCase().trim();
        if (cleanMatch.length > 1 && !STOPWORDS.has(cleanMatch)) {
          const key = `${cleanMatch}-${type}`;
          if (entities.has(key)) {
            const entity = entities.get(key)!;
            entity.frequency += 1;
            if (!entity.traces.includes(traceId)) {
              entity.traces.push(traceId);
            }
          } else {
            entities.set(key, {
              text: cleanMatch,
              frequency: 1,
              type: type as RealEntityData["type"],
              context: [
                text.substring(
                  Math.max(0, text.indexOf(match) - 50),
                  text.indexOf(match) + match.length + 50
                ),
              ],
              traces: [traceId],
            });
          }
        }
      });
    }
  });

  return Array.from(entities.values());
}

export function analyzeTraceContent(trace: any): TraceContentAnalysis {
  const traceId = trace.id || trace.trace_id || "unknown";
  const content = [
    trace.title || "",
    trace.description || "",
    ...(trace.knowledge_graphs
      ?.map((kg: any) => [kg.system_name || "", kg.system_summary || ""])
      .flat() || []),
  ]
    .filter(Boolean)
    .join(" ");

  if (!content) {
    return {
      entities: [],
      relations: [],
      insights: {
        commonPatterns: [],
        questionCount: 0,
        commandCount: 0,
        averageResponseLength: 0,
        contentThemes: [],
      },
      readingTime: 0,
      complexity: 0,
      wordCount: 0,
      characterCount: 0,
      timeWithoutAgentGraph: 0,
      timeSavedPercentage: 0,
      comprehensionScore: 0,
    };
  }

  const entities = extractEntitiesFromText(content, traceId);
  const relations = extractActualRelations(content, traceId);

  const wordCount = content
    .split(/\s+/)
    .filter((word) => word.length > 0).length;
  const characterCount = content.length;

  // Safe calculations to avoid NaN
  const readingTime =
    wordCount > 0 ? Math.max(1, Math.ceil(wordCount / 200)) : 0;
  const complexity = calculateTextComplexity(content);

  // Calculate AgentGraph value metrics
  const timeWithoutAgentGraph = Math.max(
    readingTime * 3,
    Math.ceil(wordCount / 100)
  ); // 3x longer without visualization
  const timeSavedPercentage =
    readingTime > 0
      ? Math.round(
          ((timeWithoutAgentGraph - readingTime) / timeWithoutAgentGraph) * 100
        )
      : 0;

  // Comprehension score based on structure and entity/relation density
  const entityDensity =
    wordCount > 0 ? (entities.length / wordCount) * 1000 : 0;
  const relationDensity =
    wordCount > 0 ? (relations.length / wordCount) * 1000 : 0;
  const structureScore = Math.min(100, (entityDensity + relationDensity) * 10);
  const comprehensionScore = Math.min(
    100,
    Math.max(0, 100 - complexity + structureScore)
  );

  return {
    entities,
    relations,
    insights: analyzeContentInsights(content),
    readingTime,
    complexity,
    wordCount,
    characterCount,
    timeWithoutAgentGraph,
    timeSavedPercentage,
    comprehensionScore,
  };
}

export function buildEntityFrequencyMap(traces: any[]): RealEntityData[] {
  const entityMap: Map<string, RealEntityData> = new Map();

  traces.forEach((trace) => {
    const analysis = analyzeTraceContent(trace);
    analysis.entities.forEach((entity) => {
      const key = `${entity.text}-${entity.type}`;
      if (entityMap.has(key)) {
        const existing = entityMap.get(key)!;
        existing.frequency += entity.frequency;
        existing.context.push(...entity.context);
        entity.traces.forEach((traceId) => {
          if (!existing.traces.includes(traceId)) {
            existing.traces.push(traceId);
          }
        });
      } else {
        entityMap.set(key, { ...entity });
      }
    });
  });

  return Array.from(entityMap.values())
    .sort((a, b) => b.frequency - a.frequency)
    .slice(0, 50);
}

export function extractActualRelations(
  text: string,
  traceId: string
): { from: string; to: string; type: string; traces: string[] }[] {
  if (!text || typeof text !== "string") return [];

  const relations: {
    from: string;
    to: string;
    type: string;
    traces: string[];
  }[] = [];

  RELATION_PATTERNS.forEach(({ pattern, type }) => {
    let match;
    pattern.lastIndex = 0; // Reset regex state
    while ((match = pattern.exec(text)) !== null) {
      const from = match[1]?.toLowerCase().trim();
      const to = match[3]?.toLowerCase().trim();

      if (
        from &&
        to &&
        from !== to &&
        !STOPWORDS.has(from) &&
        !STOPWORDS.has(to)
      ) {
        relations.push({
          from,
          to,
          type,
          traces: [traceId],
        });
      }
    }
  });

  return relations;
}

function analyzeContentInsights(text: string): ContentInsightData {
  if (!text) {
    return {
      commonPatterns: [],
      questionCount: 0,
      commandCount: 0,
      averageResponseLength: 0,
      contentThemes: [],
    };
  }

  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
  const questionCount = (text.match(/\?/g) || []).length;
  const commandCount = sentences.filter((s) =>
    /^(please|run|execute|start|stop|create|delete|update|process)/i.test(
      s.trim()
    )
  ).length;

  const averageResponseLength =
    sentences.length > 0
      ? Math.round(
          sentences.reduce((sum, s) => sum + s.length, 0) / sentences.length
        )
      : 0;

  const themes = extractThemes(text);

  return {
    commonPatterns: findCommonPatterns(text),
    questionCount,
    commandCount,
    averageResponseLength,
    contentThemes: themes,
  };
}

function extractThemes(text: string): { theme: string; weight: number }[] {
  if (!text) return [];

  const words = text
    .toLowerCase()
    .replace(/[^\w\s]/g, " ")
    .split(/\s+/)
    .filter((word) => word.length > 3 && !STOPWORDS.has(word));

  const frequency: Record<string, number> = {};
  words.forEach((word) => {
    frequency[word] = (frequency[word] || 0) + 1;
  });

  const totalWords = words.length;

  return Object.entries(frequency)
    .filter(([_, count]) => count > 1)
    .map(([theme, count]) => ({
      theme,
      weight: totalWords > 0 ? Math.round((count / totalWords) * 100) : 0,
    }))
    .sort((a, b) => b.weight - a.weight)
    .slice(0, 10);
}

function findCommonPatterns(text: string): string[] {
  if (!text) return [];

  const patterns = [
    { pattern: /user\s+(asks|requests|wants|needs)/gi, name: "user_requests" },
    {
      pattern: /system\s+(processes|analyzes|generates|creates)/gi,
      name: "system_actions",
    },
    { pattern: /error\s+(occurred|found|detected)/gi, name: "error_handling" },
    {
      pattern: /data\s+(processed|analyzed|transformed)/gi,
      name: "data_processing",
    },
  ];

  return patterns.filter((p) => p.pattern.test(text)).map((p) => p.name);
}

function calculateTextComplexity(text: string): number {
  if (!text || text.length === 0) return 0;

  const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0);
  const words = text.split(/\s+/).filter((w) => w.length > 0);

  if (sentences.length === 0 || words.length === 0) return 0;

  const avgWordsPerSentence = words.length / sentences.length;
  const avgCharsPerWord = text.replace(/\s+/g, "").length / words.length;

  // Ensure we don't get NaN by providing safe defaults
  const wordsPerSentence = isNaN(avgWordsPerSentence) ? 0 : avgWordsPerSentence;
  const charsPerWord = isNaN(avgCharsPerWord) ? 0 : avgCharsPerWord;

  const complexity = Math.min(
    100,
    Math.round(wordsPerSentence * 0.5 + charsPerWord * 2)
  );

  return isNaN(complexity) ? 0 : complexity;
}