File size: 2,321 Bytes
9858829 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | -- Query to fetch comments with all required information
-- Includes: comments, parent comments, timestamps, IDs, channel info, and content description
-- Excludes comments that have already been processed (present in ML_FEATURES table)
SELECT
fc.COMMENT_SK,
fc.COMMENT_ID,
fc.PLATFORM,
fc.MESSAGE AS COMMENT_TEXT,
fc.CREATED_TIME AS COMMENT_TIMESTAMP,
fc.AUTHOR_NAME,
fc.AUTHOR_ID,
fc.LIKE_COUNT,
fc.PARENT_COMMENT_ID,
fc.REPLIES_COUNT,
fc.COMMENT_LENGTH,
fc.IS_ACTIVE AS COMMENT_IS_ACTIVE,
-- Parent comment information (self-join to get parent comment text)
parent_fc.MESSAGE AS PARENT_COMMENT_TEXT,
-- Content information (content description is in MESSAGE column)
dc.CONTENT_SK,
dc.CONTENT_ID,
dc.CONTENT_TYPE,
dc.MESSAGE AS CONTENT_DESCRIPTION,
dc.TITLE AS CONTENT_TITLE,
dc.PERMALINK_URL,
dc.CREATED_TIME AS CONTENT_TIMESTAMP,
dc.SHARES_COUNT,
dc.REACTIONS_TOTAL,
dc.COMMENTS_TOTAL,
dc.HASHTAGS,
-- Channel information
dch.CHANNEL_SK,
dch.CHANNEL_NAME,
dch.CHANNEL_DISPLAY_NAME,
dch.PAGE_ID,
dch.CHANNEL_URL,
dch.IS_ACTIVE AS CHANNEL_IS_ACTIVE
FROM
SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS fc
-- Left join to get parent comment text if it exists
LEFT JOIN
SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS parent_fc
ON fc.PARENT_COMMENT_ID = parent_fc.COMMENT_ID
AND fc.PLATFORM = parent_fc.PLATFORM
INNER JOIN
SOCIAL_MEDIA_DB.CORE.DIM_CONTENT dc
ON fc.CONTENT_SK = dc.CONTENT_SK
INNER JOIN
SOCIAL_MEDIA_DB.CORE.DIM_CHANNEL dch
ON dc.CHANNEL_NAME = dch.CHANNEL_NAME
AND dc.PLATFORM = dch.PLATFORM
LEFT JOIN
SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES csf
ON fc.COMMENT_SK = csf.COMMENT_SK
WHERE
fc.IS_ACTIVE = TRUE
AND dc.IS_ACTIVE = TRUE
AND dch.IS_ACTIVE = TRUE
AND (fc.AUTHOR_NAME IS NULL OR fc.AUTHOR_NAME NOT IN ('Musora', 'Drumeo', 'Pianote', '@PianoteOfficial', '@DrumeoOfficial', '@MusoraOfficial'))
AND csf.COMMENT_SK IS NULL -- Exclude comments already processed
AND fc.MESSAGE IS NOT NULL -- Exclude NULL comments
AND TRIM(fc.MESSAGE) != '' -- Exclude empty or whitespace-only comments
AND LENGTH(TRIM(fc.MESSAGE)) > 0 -- Double-check for non-empty content
ORDER BY
fc.CREATED_TIME DESC |