-- Query to fetch comments with all required information -- Includes: comments, parent comments, timestamps, IDs, channel info, and content description -- Excludes comments that have already been processed (present in ML_FEATURES table) SELECT fc.COMMENT_SK, fc.COMMENT_ID, fc.PLATFORM, fc.MESSAGE AS COMMENT_TEXT, fc.CREATED_TIME AS COMMENT_TIMESTAMP, fc.AUTHOR_NAME, fc.AUTHOR_ID, fc.LIKE_COUNT, fc.PARENT_COMMENT_ID, fc.REPLIES_COUNT, fc.COMMENT_LENGTH, fc.IS_ACTIVE AS COMMENT_IS_ACTIVE, -- Parent comment information (self-join to get parent comment text) parent_fc.MESSAGE AS PARENT_COMMENT_TEXT, -- Content information (content description is in MESSAGE column) dc.CONTENT_SK, dc.CONTENT_ID, dc.CONTENT_TYPE, dc.MESSAGE AS CONTENT_DESCRIPTION, dc.TITLE AS CONTENT_TITLE, dc.PERMALINK_URL, dc.CREATED_TIME AS CONTENT_TIMESTAMP, dc.SHARES_COUNT, dc.REACTIONS_TOTAL, dc.COMMENTS_TOTAL, dc.HASHTAGS, -- Channel information dch.CHANNEL_SK, dch.CHANNEL_NAME, dch.CHANNEL_DISPLAY_NAME, dch.PAGE_ID, dch.CHANNEL_URL, dch.IS_ACTIVE AS CHANNEL_IS_ACTIVE FROM SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS fc -- Left join to get parent comment text if it exists LEFT JOIN SOCIAL_MEDIA_DB.CORE.FACT_COMMENTS parent_fc ON fc.PARENT_COMMENT_ID = parent_fc.COMMENT_ID AND fc.PLATFORM = parent_fc.PLATFORM INNER JOIN SOCIAL_MEDIA_DB.CORE.DIM_CONTENT dc ON fc.CONTENT_SK = dc.CONTENT_SK INNER JOIN SOCIAL_MEDIA_DB.CORE.DIM_CHANNEL dch ON dc.CHANNEL_NAME = dch.CHANNEL_NAME AND dc.PLATFORM = dch.PLATFORM LEFT JOIN SOCIAL_MEDIA_DB.ML_FEATURES.COMMENT_SENTIMENT_FEATURES csf ON fc.COMMENT_SK = csf.COMMENT_SK WHERE fc.IS_ACTIVE = TRUE AND dc.IS_ACTIVE = TRUE AND dch.IS_ACTIVE = TRUE AND (fc.AUTHOR_NAME IS NULL OR fc.AUTHOR_NAME NOT IN ('Musora', 'Drumeo', 'Pianote', '@PianoteOfficial', '@DrumeoOfficial', '@MusoraOfficial')) AND csf.COMMENT_SK IS NULL -- Exclude comments already processed AND fc.MESSAGE IS NOT NULL -- Exclude NULL comments AND TRIM(fc.MESSAGE) != '' -- Exclude empty or whitespace-only comments AND LENGTH(TRIM(fc.MESSAGE)) > 0 -- Double-check for non-empty content ORDER BY fc.CREATED_TIME DESC