Spaces:
Sleeping
Sleeping
| WITH dedup_master_files AS ( | |
| SELECT * | |
| FROM ( | |
| SELECT | |
| *, | |
| LOWER( | |
| REGEXP_REPLACE(original_filename, r'^(Copy of\s*)+', '') | |
| ) AS cleaned_filename, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY original_filename | |
| ORDER BY created_at DESC | |
| ) AS rn | |
| FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files` | |
| ) | |
| WHERE rn = 1 | |
| ), | |
| dedup_page1_cover AS ( | |
| SELECT * | |
| FROM ( | |
| SELECT | |
| *, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY file_id | |
| ORDER BY created_at DESC | |
| ) AS rn | |
| FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover` | |
| ) | |
| WHERE rn = 1 | |
| ), | |
| dedup_page2_identitas AS ( | |
| SELECT * | |
| FROM ( | |
| SELECT | |
| *, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY file_id | |
| ORDER BY inserted_at DESC | |
| ) AS rn | |
| FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas` | |
| ) | |
| WHERE rn = 1 | |
| ), | |
| label AS ( | |
| SELECT | |
| filename AS filename_label, | |
| project_id_mother AS project_id_mother_label, | |
| CASE | |
| WHEN first_name IS NOT NULL | |
| AND last_name IS NOT NULL | |
| AND first_name = last_name | |
| THEN first_name | |
| ELSE CONCAT( | |
| IFNULL(first_name, ''), | |
| IFNULL( | |
| CONCAT(' ', last_name), | |
| '' | |
| ) | |
| ) | |
| END AS nama_ibu_cover_label, | |
| puskesmas_name AS dikeluarkan_oleh_fasilitas_kesehatan_label, | |
| regency_name AS kabupaten_kota_label, | |
| CASE | |
| WHEN first_name IS NOT NULL | |
| AND last_name IS NOT NULL | |
| AND first_name = last_name | |
| THEN first_name | |
| ELSE CONCAT( | |
| IFNULL(first_name, ''), | |
| IFNULL( | |
| CONCAT(' ', last_name), | |
| '' | |
| ) | |
| ) | |
| END AS nama_ibu, | |
| nik_mother AS nik_ibu_label, | |
| birth_date AS tempat_tanggal_lahir_ibu_label, | |
| address_street AS alamat_rumah_ibu_label, | |
| contact_number AS telepon_ibu_label, | |
| bpjs_mother AS no_jkn_ibu_label, | |
| education_level AS pendidikan_ibu_label, | |
| occupation AS pekerjaan_ibu_label, | |
| blood_type_result AS golongan_darah_ibu_label, | |
| age AS usia_ibu_label, | |
| pregnancy_number AS kehamilan_ke_label, | |
| number_live_birth AS jumlah_anak_lahir_hidup_label, | |
| number_birth_lost AS riwayat_keguguran_label, | |
| previous_preg_issue AS riwayat_penyakit_ibu_label | |
| FROM ( | |
| SELECT | |
| *, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY `filename` | |
| ORDER BY `end` DESC | |
| ) AS rn | |
| FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025` | |
| ) | |
| WHERE | |
| rn = 1 | |
| ) | |
| , | |
| main AS ( | |
| SELECT | |
| MF.id, | |
| MF.cleaned_filename AS `filename`, | |
| REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother, | |
| P1.nama_ibu_cover, | |
| P1.dikeluarkan_oleh_fasilitas_kesehatan, | |
| P1.kabupaten_kota, | |
| P2.nama_ibu, | |
| P2.nik_ibu, | |
| P2.tempat_tanggal_lahir_ibu, | |
| P2.alamat_rumah_ibu, | |
| P2.telepon_ibu, | |
| P2.no_jkn_ibu, | |
| P2.pendidikan_ibu, | |
| P2.pekerjaan_ibu, | |
| P2.golongan_darah_ibu, | |
| P2.usia_ibu, | |
| P2.kehamilan_ke, | |
| P2.jumlah_anak_lahir_hidup, | |
| P2.riwayat_keguguran, | |
| P2.riwayat_penyakit_ibu, | |
| L.*, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY MF.`cleaned_filename` | |
| ORDER BY MF.`cleaned_filename` DESC | |
| ) AS rn | |
| FROM dedup_master_files MF | |
| LEFT JOIN dedup_page1_cover P1 ON MF.id = P1.id | |
| LEFT JOIN dedup_page2_identitas P2 ON MF.id = P2.id | |
| LEFT JOIN label L ON CAST(REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS INTEGER) = L.project_id_mother_label | |
| ) | |
| SELECT * | |
| FROM main | |
| WHERE | |
| rn = 1 and | |
| id != "9a21ef00-6b02-4b04-81c3-68e25e2c8b7f" --duplicated | |