Spaces:
Sleeping
Sleeping
File size: 3,527 Bytes
34844c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
WITH dedup_master_files AS (
SELECT *
FROM (
SELECT
*,
LOWER(
REGEXP_REPLACE(original_filename, r'^(Copy of\s*)+', '')
) AS cleaned_filename,
ROW_NUMBER() OVER (
PARTITION BY original_filename
ORDER BY created_at DESC
) AS rn
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.master_files`
)
WHERE rn = 1
),
dedup_page1_cover AS (
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY file_id
ORDER BY created_at DESC
) AS rn
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page1_cover`
)
WHERE rn = 1
),
dedup_page2_identitas AS (
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY file_id
ORDER BY inserted_at DESC
) AS rn
FROM `stellar-orb-451904-d9.raw_data_ocr_kia.page2_identitas`
)
WHERE rn = 1
),
label AS (
SELECT
filename AS filename_label,
project_id_mother AS project_id_mother_label,
CASE
WHEN first_name IS NOT NULL
AND last_name IS NOT NULL
AND first_name = last_name
THEN first_name
ELSE CONCAT(
IFNULL(first_name, ''),
IFNULL(
CONCAT(' ', last_name),
''
)
)
END AS nama_ibu_cover_label,
puskesmas_name AS dikeluarkan_oleh_fasilitas_kesehatan_label,
regency_name AS kabupaten_kota_label,
CASE
WHEN first_name IS NOT NULL
AND last_name IS NOT NULL
AND first_name = last_name
THEN first_name
ELSE CONCAT(
IFNULL(first_name, ''),
IFNULL(
CONCAT(' ', last_name),
''
)
)
END AS nama_ibu,
nik_mother AS nik_ibu_label,
birth_date AS tempat_tanggal_lahir_ibu_label,
address_street AS alamat_rumah_ibu_label,
contact_number AS telepon_ibu_label,
bpjs_mother AS no_jkn_ibu_label,
education_level AS pendidikan_ibu_label,
occupation AS pekerjaan_ibu_label,
blood_type_result AS golongan_darah_ibu_label,
age AS usia_ibu_label,
pregnancy_number AS kehamilan_ke_label,
number_live_birth AS jumlah_anak_lahir_hidup_label,
number_birth_lost AS riwayat_keguguran_label,
previous_preg_issue AS riwayat_penyakit_ibu_label
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY `filename`
ORDER BY `end` DESC
) AS rn
FROM `stellar-orb-451904-d9.ocr_bukukia_ocr_sid.List Correct Entry 2025`
)
WHERE
rn = 1
)
,
main AS (
SELECT
MF.id,
MF.cleaned_filename AS `filename`,
REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS project_id_mother,
P1.nama_ibu_cover,
P1.dikeluarkan_oleh_fasilitas_kesehatan,
P1.kabupaten_kota,
P2.nama_ibu,
P2.nik_ibu,
P2.tempat_tanggal_lahir_ibu,
P2.alamat_rumah_ibu,
P2.telepon_ibu,
P2.no_jkn_ibu,
P2.pendidikan_ibu,
P2.pekerjaan_ibu,
P2.golongan_darah_ibu,
P2.usia_ibu,
P2.kehamilan_ke,
P2.jumlah_anak_lahir_hidup,
P2.riwayat_keguguran,
P2.riwayat_penyakit_ibu,
L.*,
ROW_NUMBER() OVER (
PARTITION BY MF.`cleaned_filename`
ORDER BY MF.`cleaned_filename` DESC
) AS rn
FROM dedup_master_files MF
LEFT JOIN dedup_page1_cover P1 ON MF.id = P1.id
LEFT JOIN dedup_page2_identitas P2 ON MF.id = P2.id
LEFT JOIN label L ON CAST(REGEXP_EXTRACT(MF.original_filename, r'(7000\d{4,})') AS INTEGER) = L.project_id_mother_label
)
SELECT *
FROM main
WHERE
rn = 1 and
id != "9a21ef00-6b02-4b04-81c3-68e25e2c8b7f" --duplicated
|