/**
 * A lot of translations have minor corruptions that will lead to rendering
 * failing (and having to rely on English fallback). Many of these are
 * easy to manually correct for.
 *
 * This function is a temporary solution to correct for these corruptions.
 * It looks for easy "low hanging fruit" that we can correct for.
 *
 */

interface CorrectionContext {
  code?: string
  dottedPath?: string
  relativePath?: string
  [key: string]: any
}

export function correctTranslatedContentStrings(
  content: string,
  englishContent: string,
  context: CorrectionContext = {},
): string {
  // A lot of translations have corruptions around the AUTOTITLE links.
  // We've requested that these are corrected back but as a temporary
  // solution we'll manually recover now.
  // See internal issue #2762
  // In late 2023, search in the translations repos if these things are
  // still happening and if not, the following lines can be removed.
  content = content.replaceAll('[AUTOTITLE"을 참조하세요]', '[AUTOTITLE]')
  content = content.replaceAll('[AUTOTITLE"을]', '[AUTOTITLE]')
  content = content.replaceAll('["AUTOTITLE]', '"[AUTOTITLE]')
  content = content.replaceAll('[AUTOTITLE"을 참조하세요.](', '[AUTOTITLE](')
  content = content.replaceAll('[ AUTOTITLE](', '[AUTOTITLE](')
  content = content.replaceAll('[ "AUTOTITLE](', '[AUTOTITLE](')
  content = content.replaceAll('[«AUTOTITLE»](', '[AUTOTITLE](')

  // We've seen a lot of these across different languages.
  content = content.replaceAll('{{% octicon', '{% octicon')
  content = content.replaceAll('{{%octicon', '{% octicon')
  content = content.replaceAll('{{% endif %}', '{% endif %}')
  content = content.replaceAll('{{%endif %}', '{% endif %}')

  // For a short while we injected `replacedomain` into code snippets
  // to activate the Domain Edit functionality. That was in `main` for a
  // while and was later removed in English. But during that window of
  // time, some translations picked it up. Let's remove it. For now.
  // The day we re-instate editable domain, delete these lines.
  if (content.includes('replacedomain')) {
    content = content.replaceAll('```text replacedomain copy', '```text copy')
    content = content.replaceAll('```shell replacedomain', '```shell')
  }

  if (context.code === 'ru') {
    // Low-hanging fruit for the data tag
    content = content.replaceAll('{% данных variables', '{% data variables')
    content = content.replaceAll('{% данными variables', '{% data variables')
    content = content.replaceAll('{% данных организации variables', '{% data variables')
    content = content.replaceAll('{% данным variables.', '{% data variables.')
    content = content.replaceAll('{% данные variables.', '{% data variables.')
    content = content.replaceAll('{% данных reusables', '{% data reusables')
    content = content.replaceAll('{% данными reusables', '{% data reusables')
    content = content.replaceAll('{% variables.', '{% data variables.')
    content = content.replaceAll('{% необработанного %}', '{% raw %}')
    content = content.replaceAll('{%- ifversion fpt или ghec %}', '{%- ifversion fpt or ghec %}')
    content = content.replaceAll('{% ifversion fpt или ghec %}', '{% ifversion fpt or ghec %}')
    content = content.replaceAll('{% endif _%}', '{% endif %}')
    content = content.replaceAll('{% конечным %}', '{% endif %}')
    content = content.replaceAll('{% переменных данных.', '{% data variables.')
    content = content.replaceAll('{% повторно используемых данных.', '{% data reusables.')
    content = content.replaceAll('{% примечание %}', '{% note %}')
    content = content.replaceAll('{% конечных головщиков %}', '{% endrowheaders %}')
    content = content.replaceAll('{% данных для повторного использования.', '{% data reusables.')
    content = content.replaceAll('{% еще %}', '{% else %}')
    content = content.replaceAll('{% необработанные %}', '{% raw %}')
    content = content.replaceAll('{% подсказки %}', '{% tip %}')

    // Fix YAML quote issues in UI files. Specifically the disclaimer href attribute
    // href="...}> -> href="...">
    content = content.replace(/href="([^"]*)}>/g, 'href="$1">')

    // Fix double quotes in Russian YAML files that cause parsing errors
    // ""https:// -> "https://
    content = content.replace(/href=""https:\/\//g, 'href="https://')

    // Fix empty HTML tags that cause YAML parsing issues
    content = content.replaceAll('<b></b>', '')
    content = content.replaceAll('<u></u>', '')

    // Fix specific Russian UI YAML issues causing 502 errors
    // Remove empty bold tags from early_access notice
    content = content.replace(/early_access:\s*"([^"]*)<b><\/b>([^"]*)"/, 'early_access: "$1$2"')

    // Remove empty underline tags from privacy disclaimer
    content = content.replace(/(privacy_disclaimer:[^<]*)<u><\/u>/g, '$1')

    // For the rather custom Russian translation of
    // the content/get-started/learning-about-github/github-glossary.md page
    // These string replacements speak for themselves.
    content = content.replaceAll(
      '{% для глоссария в глоссариях %}',
      '{% for glossary in glossaries %}',
    )
    content = content.replaceAll('{{ глоссарий.term }}', '{{ glossary.term }}')
    content = content.replaceAll('{{ глоссарий.description }}', '{{ glossary.description }}')
  }

  if (context.code === 'ja') {
    // Low-hanging fruit for the data tag
    content = content.replaceAll('{% データ variables', '{% data variables')
    content = content.replaceAll('{% データvariables', '{% data variables')

    // Fix specific issue likely causing 502 errors
    // Remove trailing quote from the problematic translation
    content = content.replace(
      /asked_too_many_times:\s*申し訳ありません。短い時間に質問が多すぎます。\s*しばらく待ってからもう一度やり直してください。"\s*$/gm,
      'asked_too_many_times: 申し訳ありません。短い時間に質問が多すぎます。 しばらく待ってからもう一度やり直してください。',
    )

    // Internal issue #4160
    content = content.replaceAll(
      '- % data variables.product.prodname_copilot_enterprise %}',
      '- {% data variables.copilot.copilot_enterprise %}',
    )

    // This might not be exclusive to Japanese but put here because, at
    // the time of writing, it only happens on the Japanse translations.
    // According to the Microsoft translation guidelines, they're not
    // supposed to translate words that will be seen in the UI, but
    // instead mention then like this:
    //
    //    [Save changes](THE TRANSLATION OF "Save changes" IN JAPANESE)
    //
    // The problem is when these are wrapped in a deliberate Markdown link.
    // For example:
    //
    //    [[Save changes](THE TRANSLATION OF "Save changes" IN JAPANESE)](#some-section)
    //
    // A real observed example is:
    //
    //    [[Allow deletions](削除を許可)](#allow-deletions)
    //
    // Here, because "削除を許可" contains no spaces, the Markdown parser
    // thinks "削除を許可" is the URL! But in actuality,
    // `[Allow deletions](削除を許可)` is the text and `#allow-deletions`
    // is the URL.
    // This problem does not exhibit if the text "削除を許可" were to contain
    // a space character. But we can't assume that we can just add a space.
    // For example "削除 を許可" would be incorrect. And where do you put the
    // space? Between which characters.
    // Instead, we can inject a "hair space" whitespace character between
    // the `]` and the `(`. Then, the Markdown processor does not get confused
    // and the link is rendered correctly.
    // The `\u200A` is the "hair space" character. Technically whitespace
    // but not wide enough to visually appear as a space.
    content = content.replace(/\[(\[.*?\])(\(\S+\)\]\()/g, '[$1\u200A$2')
  }

  if (context.code === 'zh') {
    // Low-hanging fruit for the data tag
    content = content.replaceAll('{% 数据variables', '{% data variables')
  }

  if (context.code === 'ko') {
    // Low-hanging fruit for the data tag
    content = content.replaceAll('{% 데이터 variables', '{% data variables')
    content = content.replaceAll('{% 데이터 reusables.', '{% data reusables.')

    // For the rather custom Korean translation of github-glossary.md
    // Let's try to salvage based on what's in
    // docs-internal.ko-kr/content/get-started/learning-about-github/github-glossary.md
    // as of September 2023.
    content = content.replaceAll('용어집 %}의 용어집에 대한 {%', '{% for glossary in glossaries %}')
    content = content.replaceAll('{{ 용어집.term }}', '{{ glossary.term }}')
    content = content.replaceAll('{{ 용어집.description }}', '{{ glossary.description }}')
  }

  if (context.code === 'es') {
    // Seen these a few times in the Spanish translations.
    content = content.replaceAll('{% vulnerables variables.', '{% data variables.')
  }

  // We have seen a lot of Markdown tables, that may have Liquid tags
  // (like `{% ifversion ... %}`) within them lose the linebreak between
  // the heading and the first row marker.
  // For example:
  //
  //    | **Sprache** | **Ökosystem** | **Manifestdatei** | **Unterstützter Abhängigkeitsbereich** | |:---|:---:|:---:|:---|{% ifversion dep
  //
  // The equivalent English for that is:
  //
  //    | **Language** | **Ecosystem** | **Manifest file** | **Dependency scope supported** |
  //    |:---|:---:|:---:|:---|
  //    {%- ifversion dependency-graph-dart-support %}
  //
  // Let's inject these newline characters if found in the English content.
  if (content.includes('| |:---|:') && englishContent.includes('|\n|:---|')) {
    content = content.replaceAll('| |:---|:', '|\n|:---|:')
  }
  if (content.includes('|:---|{% ifversion') && englishContent.includes('|:---|\n{%- ifversion')) {
    content = content.replaceAll('|:---|{% ifversion', '|:---|\n{%- ifversion')
  }

  if (context.dottedPath === 'reusables.copilot.differences-cfi-cfb-table') {
    // As of Dec 2023, the French translation has a subtle typo.
    // This string replace is highly specific and clearly only going to
    // work if the exact French, incorrect, translation is still in use.
    // Consider deleting these lines in mid-2024 because hopefully by then
    // the translation mistake, which we've reported, will be corrected.
    content = content.replace(
      '{% data variables.copilot.cfi_price_per_month %} par utilisateur et par mois',
      '{% data variables.copilot.cfb_price_per_month %} par utilisateur et par mois.',
    )
  }

  // These are common mistakes made by translations that are specific.
  // It's prevalent in all translations so that's why it's not per-language.
  // It's important though that this happens after the other per-language
  // specific fixes above. For example `{{% данных variables...`
  content = content.replaceAll('{{% data variables.', '{% data variables.')
  content = content.replaceAll('{%%data variables.', '{% data variables.')
  content = content.replaceAll('{{% data reusables.', '{% data reusables.')
  content = content.replaceAll('{%%data reusables.', '{% data reusables.')
  content = content.replaceAll('{{% ifversion ', '{% ifversion ')

  // A lot of Liquid tags lose their linebreak after the `}`
  // result in formatting problems, especially around Markdown tables.
  // This code here, compares each Liquid statement, in the translation,
  // and tests if it appears like that but with a newline in the English.
  // English example:
  //
  //    {%- ifversion ghes %}
  //    | Thing | ✔️ |
  //    {%- endif %}
  //
  // Translation example:
  //
  //    {%- ifversion ghes %} | Thing | ✔️ | {%- endif %}
  //
  // There exists the risk that different Liquid statements gets compared
  // different Liquid statements in the English, but the risk is worth
  // taking because even if this accidentally introduces a newline, it's
  // unlikely to cause a problem. At worst that a sentence displays on its
  // own paragraph.
  content = content.replace(/\{%(.+?)%\} /g, (match) => {
    if (match.lastIndexOf('{%') > 0) {
      // For example:
      //
      //    `{% bla bla %}, and {% foo bar %} `
      //
      // Our regex is not greedy, but technically, if you look closely
      // you'll see this is the first match that starts with `{%` and
      // ends with `%} `. Let's skip these.
      return match
    }

    const withLinebreak = `${match.slice(0, -1)}\n`
    if (englishContent.includes(withLinebreak) && !englishContent.includes(match)) {
      return withLinebreak
    }
    return match
  })
  // The above corrections deepend on looking for `{% foo %} ` and replacing
  // it with `{% foo %}\n`. ...if `{% foo %}\n` was in the English
  // content and `{% foo %} ` was *not*.
  // However we see a lot of cases of this:
  //
  //    ... {% endif %} | First Column ...
  //
  // Which needs to become this:
  //
  //    ... {% endif %}
  //    | First Column ...
  //
  // And since `{% endif %}` is such a common Liquid tag we can't reply
  // on looking for it with `{% endif %}\n` in the English content.
  content = content.replace(/\{% endif %\} \| /g, (match) => {
    const potentiallyBetter = '{% endif %}\n| '
    if (englishContent.includes(potentiallyBetter)) {
      return potentiallyBetter
    }
    return match
  })

  // All too often we see translations that look like this:
  //
  //   | Qualifizierer | Beschreibung | | -------- | -------- | {% ifversion ghec or ghes > 3.8 %} | `advanced-security:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_GH_advanced_security %} aktiviert wurde | {% endif %} | `code-scanning-pull-request-alerts:enabled`| Zeigt Repositorys an, für die die {% data variables.product.prodname_code_scanning %} zur Ausführung bei Pull Requests konfiguriert wurde | | `dependabot-security-updates:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_dependabot %}-Sicherheitsupdates aktiviert wurden  | | `secret-scanning-push-protection:enabled` | Zeigt Repositorys an, für die der Pushschutz für die {% data variables.product.prodname_secret_scanning %} aktiviert wurde | {% endif %}
  //
  // Yes, that's one very long line. Notice how all the necessary linebreaks
  // are suddenly gone.
  content = content.replaceAll(' | | ', ' |\n| ')

  // This is a bit of a hack, but it works.
  // It looks for patterns like this:
  //
  //     Some words --------|-------|{
  //
  // And from that it tries to convert it to:
  //
  //     Some words
  //     --------|-------|{
  //
  // But because it's quite a broad solution specifically around any
  // Markdown table syntax, let's be extra careful and only apply it
  // to the select few pages with known problems.
  if (context.relativePath?.endsWith('scopes-for-oauth-apps.md')) {
    if (context.code === 'pt') {
      // As of Aug 2023, the Portuguese translation seems to have lost the
      // `|` characters in their Markdown table syntax.
      content = content.replace(/(\w)(\s-+\s-+\s){%/g, (whole, start, rest) => {
        return `${start}\n${rest.replace(/\s/g, '|')}`
      })
    }
    content = content.replace(/(\S\s*)(--+\|--+\|{)/, (whole, start, rest) => {
      return `${start}\n${rest}`
    })
  }

  // We *used* to mention this key within an English sentence. But that
  // whole sentence is removed (from the English) and thus we need to remove
  // same sentence from the translations as well.
  // Ideally, the translators immediately notice the change but we can't
  // guarantee that turnaround time. So we string replace it with an
  // empty string.
  // NOTE! By late 2024 all translations *should* have caught up with
  // English translation (which removed the sentence). Then we can
  // delete all of this code.
  // See internal issue docs-content#13361
  if (
    context.relativePath ===
    'authentication/managing-commit-signature-verification/about-commit-signature-verification.md'
  ) {
    const keyString = '5DE3 E050 9C47 EA3C F04A 42D3 4AEE 18F8 3AFD EB23'
    const translatedSentences = [
      // ru
      `Полный отпечаток ключа\u00A0\u2014 \`${keyString}\`.`,
      // ko
      `키의 전체 지문은 \`${keyString}\`입니다.`,
      // es
      `La huella digital completa de la clave es \`${keyString}\`.`,
      // zh
      `密钥的完整指纹是 \`${keyString}\`。`,
      // pt
      `A impressão digital completa da chave é \`${keyString}\`.`,
      // ja
      `キーの完全な指紋は、\`${keyString}\` です。`,
      // fr
      `L\u2019empreinte digitale complète de la clé est \`${keyString}\`.`,
      // de
      `Der vollständige Fingerabdruck des Schlüssels ist \`${keyString}\`.`,
    ]
    for (const translatedSentence of translatedSentences) {
      if (content.includes(translatedSentence)) {
        content = content.replace(translatedSentence, '')
        break
      }
    }
    if (content.includes(keyString)) {
      // NOTE! These lines are for debugging and we can delete them once
      // we're confident the keyString is no longer present in any
      // translation.
      // for (const line of content.split('\n')) {
      //   if (line.includes(keyString)) {
      //     console.log({ [context.code]: line })
      //   }
      // }
      // throw new Error('Key string is still in there!')
      content = content.replace(keyString, '[redacted in translation]')
    }
  }

  if (content.includes('{{%')) {
    const lines = content.split('\n')
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i]
      if (line.includes('{{%') && !line.includes('{{{% endraw')) {
        console.log(context.code, 'context.relativePath', context.relativePath)
        console.log(i, line)
      }
    }
  }

  return content
}