File size: 2,321 Bytes
9858829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
-- Query to fetch comments with all required information
-- Includes: comments, parent comments, timestamps, IDs, channel info, and content description
-- Excludes comments that have already been processed (present in ML_FEATURES table)

SELECT
    fc.COMMENT_SK,
    fc.COMMENT_ID,
    fc.PLATFORM,
    fc.MESSAGE AS COMMENT_TEXT,
    fc.CREATED_TIME AS COMMENT_TIMESTAMP,
    fc.AUTHOR_NAME,
    fc.AUTHOR_ID,
    fc.LIKE_COUNT,
    fc.PARENT_COMMENT_ID,
    fc.REPLIES_COUNT,
    fc.COMMENT_LENGTH,
    fc.IS_ACTIVE AS COMMENT_IS_ACTIVE,

    -- Parent comment information (self-join to get parent comment text)
    parent_fc.MESSAGE AS PARENT_COMMENT_TEXT,

    -- Content information (content description is in MESSAGE column)
    dc.CONTENT_SK,
    dc.CONTENT_ID,
    dc.CONTENT_TYPE,
    dc.MESSAGE AS CONTENT_DESCRIPTION,
    dc.TITLE AS CONTENT_TITLE,
    dc.PERMALINK_URL,
    dc.CREATED_TIME AS CONTENT_TIMESTAMP,
    dc.SHARES_COUNT,
    dc.REACTIONS_TOTAL,
    dc.COMMENTS_TOTAL,
    dc.HASHTAGS,

    -- Channel information
    dch.CHANNEL_SK,
    dch.CHANNEL_NAME,
    dch.CHANNEL_DISPLAY_NAME,
    dch.PAGE_ID,
    dch.CHANNEL_URL,
    dch.IS_ACTIVE AS CHANNEL_IS_ACTIVE

FROM
    SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS fc

-- Left join to get parent comment text if it exists
LEFT JOIN
    SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS parent_fc
    ON fc.PARENT_COMMENT_ID = parent_fc.COMMENT_ID
    AND fc.PLATFORM = parent_fc.PLATFORM

INNER JOIN
    SOCIAL_MEDIA_DB.CORE.DIM_CONTENT dc
    ON fc.CONTENT_SK = dc.CONTENT_SK

INNER JOIN
    SOCIAL_MEDIA_DB.CORE.DIM_CHANNEL dch
    ON dc.CHANNEL_NAME = dch.CHANNEL_NAME
    AND dc.PLATFORM = dch.PLATFORM

LEFT JOIN
    SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES csf
    ON fc.COMMENT_SK = csf.COMMENT_SK

WHERE
    fc.IS_ACTIVE = TRUE
    AND dc.IS_ACTIVE = TRUE
    AND dch.IS_ACTIVE = TRUE
    AND (fc.AUTHOR_NAME IS NULL OR fc.AUTHOR_NAME NOT IN ('Musora', 'Drumeo', 'Pianote', '@PianoteOfficial', '@DrumeoOfficial', '@MusoraOfficial'))
    AND csf.COMMENT_SK IS NULL  -- Exclude comments already processed
    AND fc.MESSAGE IS NOT NULL  -- Exclude NULL comments
    AND TRIM(fc.MESSAGE) != ''  -- Exclude empty or whitespace-only comments
    AND LENGTH(TRIM(fc.MESSAGE)) > 0  -- Double-check for non-empty content

ORDER BY
    fc.CREATED_TIME DESC