File size: 2,377 Bytes
1dbc34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/**
 * XML utility functions for escaping, unescaping, and extracting XML content.
 * These are pure functions with no dependencies for maximum reusability.
 */

/**
 * Escape special XML characters.
 * Handles undefined/null values by converting them to empty strings.
 */
export function escapeXml(str: string | undefined | null): string {
  if (str == null) {
    return '';
  }
  return str
    .replace(/&/g, '&')
    .replace(/</g, '&lt;')
    .replace(/>/g, '&gt;')
    .replace(/"/g, '&quot;')
    .replace(/'/g, '&apos;');
}

/**
 * Unescape XML entities back to regular characters.
 */
export function unescapeXml(str: string): string {
  return str
    .replace(/&apos;/g, "'")
    .replace(/&quot;/g, '"')
    .replace(/&gt;/g, '>')
    .replace(/&lt;/g, '<')
    .replace(/&amp;/g, '&');
}

/**
 * Escape special RegExp characters in a string.
 */
function escapeRegExp(value: string): string {
  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

/**
 * Extract the content of a specific XML section.
 *
 * Note: This function only matches bare tags without attributes.
 * Tags with attributes (e.g., `<tag id="1">`) are not supported.
 *
 * @param xmlContent - The full XML content
 * @param tagName - The tag name to extract (e.g., 'implemented_features')
 * @returns The content between the tags, or null if not found
 */
export function extractXmlSection(xmlContent: string, tagName: string): string | null {
  const safeTag = escapeRegExp(tagName);
  const regex = new RegExp(`<${safeTag}>([\\s\\S]*?)<\\/${safeTag}>`, 'i');
  const match = xmlContent.match(regex);
  return match ? match[1] : null;
}

/**
 * Extract all values from repeated XML elements.
 *
 * Note: This function only matches bare tags without attributes.
 * Tags with attributes (e.g., `<tag id="1">`) are not supported.
 *
 * @param xmlContent - The XML content to search
 * @param tagName - The tag name to extract values from
 * @returns Array of extracted values (unescaped and trimmed)
 */
export function extractXmlElements(xmlContent: string, tagName: string): string[] {
  const values: string[] = [];
  const safeTag = escapeRegExp(tagName);
  const regex = new RegExp(`<${safeTag}>([\\s\\S]*?)<\\/${safeTag}>`, 'g');
  const matches = xmlContent.matchAll(regex);

  for (const match of matches) {
    values.push(unescapeXml(match[1].trim()));
  }

  return values;
}