File size: 3,407 Bytes
5ec2e9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import { Innertube } from "youtubei.js";
import type { CaptionTrackData } from "youtubei.js/PlayerCaptionsTracklist";
import { HTTPException } from "hono/http-exception";

function createTemporalDuration(milliseconds: number) {
    return new Temporal.Duration(
        undefined,
        undefined,
        undefined,
        undefined,
        undefined,
        undefined,
        undefined,
        milliseconds,
    );
}

const ESCAPE_SUBSTITUTIONS = {
    "&": "&",
    "<": "&lt;",
    ">": "&gt;",
    "\u200E": "&lrm;",
    "\u200F": "&rlm;",
    "\u00A0": "&nbsp;",
};

export async function handleTranscripts(
    innertubeClient: Innertube,
    videoId: string,
    selectedCaption: CaptionTrackData,
) {
    const lines: string[] = ["WEBVTT"];

    const info = await innertubeClient.getInfo(videoId);
    const transcriptInfo = await (await info.getTranscript()).selectLanguage(
        selectedCaption.name.text || "",
    );
    const rawTranscriptLines = transcriptInfo.transcript.content?.body
        ?.initial_segments;

    if (rawTranscriptLines == undefined) throw new HTTPException(404);

    rawTranscriptLines.forEach((line) => {
        const timestampFormatOptions = {
            style: "digital",
            minutesDisplay: "always",
            fractionalDigits: 3,
        };

        // Temporal.Duration.prototype.toLocaleString() is supposed to delegate to Intl.DurationFormat
        // which Deno does not support. However, instead of following specs and having toLocaleString return
        // the same toString() it seems to have its own implementation of Intl.DurationFormat,
        // with its options parameter type incorrectly restricted to the same as the one for Intl.DateTimeFormatOptions
        // even though they do not share the same arguments.
        //
        // The above matches the options parameter of Intl.DurationFormat, and the resulting output is as expected.
        // Until this is fixed typechecking must be disabled for the two use cases below
        //
        // See
        // https://docs.deno.com/api/web/~/Intl.DateTimeFormatOptions
        // https://docs.deno.com/api/web/~/Temporal.Duration.prototype.toLocaleString
        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Temporal/Duration/toLocaleString
        // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DurationFormat/DurationFormat

        const start_ms = createTemporalDuration(Number(line.start_ms)).round({
            largestUnit: "year",
            relativeTo: Temporal.PlainDateTime.from("2022-01-01"),
            //@ts-ignore see above
        }).toLocaleString("en-US", timestampFormatOptions);

        const end_ms = createTemporalDuration(Number(line.end_ms)).round({
            largestUnit: "year",
            relativeTo: Temporal.PlainDateTime.from("2022-01-01"),
            //@ts-ignore see above
        }).toLocaleString("en-US", timestampFormatOptions);
        const timestamp = `${start_ms} --> ${end_ms}`;

        const text = (line.snippet?.text || "").replace(
            /[&<>‍‍\u200E\u200F\u00A0]/g,
            (match: string) =>
                ESCAPE_SUBSTITUTIONS[
                    match as keyof typeof ESCAPE_SUBSTITUTIONS
                ],
        );

        lines.push(`${timestamp}\n${text}`);
    });

    return lines.join("\n\n");
}