WITH dedup_master_files AS ( SELECT * FROM ( SELECT *, LOWER( REGEXP_REPLACE(original_filename, r'^(Copy of\s*)+', '') ) AS cleaned_filename, ROW_NUMBER() OVER ( PARTITION BY original_filename ORDER BY created_at DESC ) AS rn FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files` ) WHERE rn = 1 ), dedup_page1_cover AS ( SELECT * FROM ( SELECT *, ROW_NUMBER() OVER ( PARTITION BY file_id ORDER BY created_at DESC ) AS rn FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover` ) WHERE rn = 1 ), dedup_page2_identitas AS ( SELECT * FROM ( SELECT *, ROW_NUMBER() OVER ( PARTITION BY file_id ORDER BY inserted_at DESC ) AS rn FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas` ) WHERE rn = 1 ), label AS ( SELECT filename AS filename_label, project_id_mother AS project_id_mother_label, CASE WHEN first_name IS NOT NULL AND last_name IS NOT NULL AND first_name = last_name THEN first_name ELSE CONCAT( IFNULL(first_name, ''), IFNULL( CONCAT(' ', last_name), '' ) ) END AS nama_ibu_cover_label, puskesmas_name AS dikeluarkan_oleh_fasilitas_kesehatan_label, regency_name AS kabupaten_kota_label, CASE WHEN first_name IS NOT NULL AND last_name IS NOT NULL AND first_name = last_name THEN first_name ELSE CONCAT( IFNULL(first_name, ''), IFNULL( CONCAT(' ', last_name), '' ) ) END AS nama_ibu, nik_mother AS nik_ibu_label, birth_date AS tempat_tanggal_lahir_ibu_label, address_street AS alamat_rumah_ibu_label, contact_number AS telepon_ibu_label, bpjs_mother AS no_jkn_ibu_label, education_level AS pendidikan_ibu_label, occupation AS pekerjaan_ibu_label, blood_type_result AS golongan_darah_ibu_label, age AS usia_ibu_label, pregnancy_number AS kehamilan_ke_label, number_live_birth AS jumlah_anak_lahir_hidup_label, number_birth_lost AS riwayat_keguguran_label, previous_preg_issue AS riwayat_penyakit_ibu_label FROM ( SELECT *, ROW_NUMBER() OVER ( PARTITION BY `filename` ORDER BY `end` DESC ) AS rn FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025` ) WHERE rn = 1 ) , main AS ( SELECT MF.id, MF.cleaned_filename AS `filename`, REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother, P1.nama_ibu_cover, P1.dikeluarkan_oleh_fasilitas_kesehatan, P1.kabupaten_kota, P2.nama_ibu, P2.nik_ibu, P2.tempat_tanggal_lahir_ibu, P2.alamat_rumah_ibu, P2.telepon_ibu, P2.no_jkn_ibu, P2.pendidikan_ibu, P2.pekerjaan_ibu, P2.golongan_darah_ibu, P2.usia_ibu, P2.kehamilan_ke, P2.jumlah_anak_lahir_hidup, P2.riwayat_keguguran, P2.riwayat_penyakit_ibu, L.*, ROW_NUMBER() OVER ( PARTITION BY MF.`cleaned_filename` ORDER BY MF.`cleaned_filename` DESC ) AS rn FROM dedup_master_files MF LEFT JOIN dedup_page1_cover P1 ON MF.id = P1.id LEFT JOIN dedup_page2_identitas P2 ON MF.id = P2.id LEFT JOIN label L ON CAST(REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS INTEGER) = L.project_id_mother_label ) SELECT * FROM main WHERE rn = 1 and id != "9a21ef00-6b02-4b04-81c3-68e25e2c8b7f" --duplicated