exorcist123 commited on
Commit
438de50
·
1 Parent(s): 4904b99

add visa scraper

Browse files
Files changed (1) hide show
  1. visa_scraper.py +505 -0
visa_scraper.py ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # visa_scraper.py
2
+ import requests
3
+ from typing import Dict, List, Optional, Any
4
+ from dataclasses import dataclass
5
+ import json
6
+ from bs4 import BeautifulSoup, NavigableString
7
+
8
+ @dataclass
9
+ class VisaInfo:
10
+ """Data class for visa information"""
11
+ id: str
12
+ name: str
13
+ code: Optional[str] = None
14
+ description: Optional[str] = None
15
+ stay_duration: Optional[str] = None
16
+ duration_time: Optional[str] = None
17
+ is_multiple_entry: bool = False
18
+ is_arrival: bool = False
19
+ is_guarantor: bool = False
20
+ cost: Optional[str] = None
21
+ validity: Optional[str] = None
22
+ requirements: Optional[Dict] = None
23
+ detailed_info: Optional[str] = None
24
+
25
+ class IndonesianVisaScraper:
26
+ """
27
+ Scraper for Indonesian visa information from evisa.imigrasi.go.id
28
+ """
29
+
30
+ BASE_URL = "https://evisa.imigrasi.go.id/web/visa-selection/data"
31
+
32
+ # Country ID mapping
33
+ COUNTRY_MAPPING = {
34
+ "ALBANIA": "df26b6b5-b957-44fc-8775-5a307aff676c",
35
+ "ALGERIA": "ee9a47a3-c229-4384-a00e-c5d3132a2b6a",
36
+ "AMERICAN SAMOA": "f57b1c29-b107-402d-a33c-2198d61dee4a",
37
+ "ANDORRA": "7fc48e74-1a09-4edd-9bca-d6622ea82b1e",
38
+ "ANGOLA": "2cd8b0e4-f9a4-44f5-8014-23f1f03d3193",
39
+ "ANGUILLA": "7d9c632d-106a-40b5-b4de-d46572bdb18e",
40
+ "ANTIGUA AND BARBUDA": "dab42168-cd9e-4e6f-86ec-a1a773d34684",
41
+ "ARGENTINA": "7a0eaa53-353b-4206-9299-1badcdb0fdf1",
42
+ "ARMENIA": "e7bcd2e1-fb4b-4748-8fce-b2d0e3b3827c",
43
+ "AUSTRALIA": "46283cb1-d406-47c1-86fd-8c308ffa173a",
44
+ "AUSTRIA": "727e690b-fc21-4b8a-b17f-1de1616433b7",
45
+ "AZERBAIJAN": "a11144b1-8228-42d5-8242-a0f529c0de10",
46
+ "BAHAMAS": "0685586c-7e37-4aa1-8020-f4aa453e9472",
47
+ "BAHRAIN": "f3373174-008f-4f5c-b902-e79763719c64",
48
+ "BANGLADESH": "a14de768-1e37-4d93-8def-352266de349a",
49
+ "BARBADOS": "02054c21-3f67-4fe3-95bc-d20106985e49",
50
+ "BELARUS": "9ff91215-8e06-45b4-8f88-f7685d249358",
51
+ "BELGIUM": "94629b00-f4c4-46bd-b263-cc7c53f8b9d9",
52
+ "BELIZE": "ada414e8-6fd5-4b69-82eb-89472e20d3be",
53
+ "BENIN": "88a03b34-1b9e-4bd6-ba7b-2a7bd8838c24",
54
+ "BERMUDA": "50425995-e990-47c5-8e5e-882840ef2b7a",
55
+ "BHUTAN": "4528297b-9315-45f8-adf1-3a5f0b7e994f",
56
+ "BOLIVIA": "bdc856ae-6948-4381-9648-8fc41bef7577",
57
+ "BOSNIA AND HERZEGOVINA": "fa943b74-22e9-4650-bccb-14b466bde722",
58
+ "BOTSWANA": "f98613a2-3816-4af6-8d27-e7edc2e67366",
59
+ "BRAZIL": "cf4dfdbe-327b-4c9a-bd39-441a338cd276",
60
+ "BRUNEI DARUSSALAM": "687653fc-236b-4bc8-baf8-88a81f17618a",
61
+ "BULGARIA": "7345359f-4f82-4089-a153-acf32254b0a3",
62
+ "BURKINA FASO": "83c3ceed-af1c-4b06-bb4a-0217712043c8",
63
+ "BURUNDI": "ff78c621-fa7f-467b-812c-9912e88ac430",
64
+ "CAMBODIA": "3f387ed5-13e6-42b8-ba77-84f281d995f7",
65
+ "CAMEROON": "1d91caf5-bcf2-4e4b-8c77-7d62781bf220",
66
+ "CANADA": "47db151f-ce3e-406d-b707-8fcdfd088a61",
67
+ "CAPE VERDE": "3ecf1c2c-de95-4811-9e5e-4b03816a108a",
68
+ "CENTRAL AFRICAN REPUBLIC": "0c72f1ee-2e31-4713-86ac-e079c0f994e7",
69
+ "CHAD": "abbd5561-fde5-42e5-8237-382ab808ec8f",
70
+ "CHILE": "0c597abe-9cf2-4171-8449-06bcc928afd8",
71
+ "CHINA": "883891b6-1c3f-41c3-9279-4745f63575cb",
72
+ "COLOMBIA": "76578a13-3e64-41da-8d91-deef561311c2",
73
+ "COMOROS": "e350c59c-5236-4207-8ef9-88e4aebeda9d",
74
+ "CONGO REPUBLIC": "41d426c7-b47f-4e73-a810-dc78bcaae82f",
75
+ "COSTA RICA": "d16f1de8-e959-4569-89ab-0e165456eeb0",
76
+ "CROATIA": "a93f74b3-fcc6-473c-89f9-0e7af7d7f6b6",
77
+ "CUBA": "4f6e354a-ca46-49f3-b035-8188b2d1f6b6",
78
+ "CYPRUS": "8e5508a3-0173-4be4-bdc8-23986b9179d7",
79
+ "CZECH REPUBLIC": "318736c7-f14f-4560-a65a-8c2304ea6805",
80
+ "DEMOCRATIC REPUBLIC OF CONGO": "7d5d09ba-f7a2-4780-a730-ac5749aeae88",
81
+ "DENMARK": "25d567a3-7a4e-4248-9800-647028da9b71",
82
+ "DJIBOUTI": "79c7ec19-d69c-48f4-9cbc-96110e24667d",
83
+ "DOMINICA": "ac9f23a8-fd8e-4877-bab8-367ce26cbb06",
84
+ "DOMINICAN REPUBLIC": "0df2afd4-1945-40c3-a3bb-bcc1fa8ab081",
85
+ "ECUADOR": "c7339e3d-cbe9-4893-af6f-0bdc4b6ebd26",
86
+ "EGYPT": "29a5479e-3e9a-4fe6-a4b2-705de1654fb7",
87
+ "EL SALVADOR": "c20c9d1a-3d42-48c1-8033-12a837058ba6",
88
+ "EQUATORIAL GUINEA": "70003abd-8f1e-4baa-b513-a406405c85bd",
89
+ "ERITREA": "e9ab2043-0c97-4c52-b103-8118861898d0",
90
+ "ESTONIA": "448fa696-e0ca-4e5f-89cd-671f04d48c9f",
91
+ "ESWATINI": "033731b5-112f-49bc-a182-7b1429049dbf",
92
+ "ETHIOPIA": "a0a10b9e-3157-4d65-84c3-e96bc8a99979",
93
+ "FIJI": "67f91d7d-5166-44d3-b050-45fe4fa6bad7",
94
+ "FINLAND": "52006b57-7b02-4902-b55d-0dadc577b75c",
95
+ "FRANCE": "23e8665d-2f2c-4841-acf6-6a2cac358ed4",
96
+ "GABON": "38381f24-d8fb-42e4-ba80-f0b1451e5577",
97
+ "GAMBIA": "41d04ba0-6764-4b55-88ed-e1f776743128",
98
+ "GEORGIA": "f851c74a-b9dc-4d9d-a99c-65b6a69afe95",
99
+ "GERMANY": "2d7c66c1-9341-459c-83a8-8138766b133c",
100
+ "GHANA": "012e51a0-4e9c-4dfd-8438-b4cc375fb0ed",
101
+ "GREECE": "47aa18d9-ed29-4250-9206-6f0cdaec94ff",
102
+ "GRENADA": "9d14a529-4116-449f-818f-538994ae14c8",
103
+ "GUATEMALA": "0159ccc8-a34c-4a6d-846f-510296a43536",
104
+ "GUINEA": "5cd3306f-e3de-4031-b1f4-fb5d07e4de70",
105
+ "GUINEA-BISSAU": "e04e1e50-e2a8-4b11-97c3-f0c8d759af9a",
106
+ "GUYANA": "77774317-99e1-43c3-ada2-62aaf1a22f81",
107
+ "HAITI": "91d309f2-de3f-40bb-80e7-4b68077fdc33",
108
+ "HONDURAS": "1fee433d-5093-4752-bd18-019be82ebcae",
109
+ "HONG KONG SAR": "2ecd2c23-33ff-4624-80d3-900777e7801e",
110
+ "HUNGARY": "5d2f2361-562a-4f59-84f7-a8dabb127add",
111
+ "ICELAND": "e6ff784e-9de7-44a8-9f67-2c3beecd2efc",
112
+ "INDIA": "f51f4255-1b0b-4a1b-be01-4bc82909c33e",
113
+ "INDONESIA": "66c0c992-6f20-4464-afa0-160e48985792", # Note: Indonesia in the list
114
+ "IRAN": "66c0c992-6f20-4464-afa0-160e48985792",
115
+ "IRAQ": "d106dd98-25c4-489d-ae37-a73bdf0b4b7d",
116
+ "IRELAND": "43f85fba-b363-4c2e-aa27-83fb1a9da94c",
117
+ "ITALY": "a24388f2-c0a5-4807-894c-de6823a79a3f",
118
+ "IVORY COAST": "7dc9c73c-4c60-4ebc-bd41-13010e8c2952",
119
+ "JAMAICA": "652d4cc5-c90a-44d0-866e-94167c3dbd3b",
120
+ "JAPAN": "1beaa902-fae3-4733-984d-437375211a3d",
121
+ "JORDAN": "4c108286-8eb9-48dc-9590-4a0a3cfe2b40",
122
+ "KAZAKHSTAN": "4f68af57-24fc-4da3-9524-5a344f7ade7d",
123
+ "KENYA": "009337db-a9ad-43af-b823-c95aca6798b9",
124
+ "KUWAIT": "6d8dc2ec-dbc0-4416-a3e1-553935ee4f23",
125
+ "KYRGYZSTAN": "6214e7cd-b344-4698-ae88-d32b80cc26f3",
126
+ "LAOS": "50ebf46b-2f8e-4345-bc0c-461857b06847",
127
+ "LATVIA": "af83f9ed-713c-4ae6-a588-c3c79fe58d44",
128
+ "LEBANON": "477fc793-7ad7-4955-af53-09d9ffd197b8",
129
+ "LESOTHO": "ceef5a50-b212-4f03-b132-1db4fa3ee8d9",
130
+ "LIBYA": "a6f469cc-61ca-4640-a200-350e33630a7a",
131
+ "LIECHTENSTEIN": "97dc8df9-6382-47c4-abcb-af92e0833ab3",
132
+ "LITHUANIA": "f0127376-b8cc-49b6-ba94-90137d5c9912",
133
+ "LUXEMBOURG": "b2ac4801-ca60-4fd1-9a09-9de023939a17",
134
+ "MACAO SAR": "63ad0853-32f7-444a-94d3-1db1515f6411",
135
+ "MACEDONIA": "ca2e8749-8c9c-48ea-8dbe-1d3733ffaf70",
136
+ "MADAGASCAR": "22f9fd8e-f208-49c9-8afd-968066319f12",
137
+ "MALAWI": "d2a7beda-5424-4d51-ae10-f37ca2f470c5",
138
+ "MALAYSIA": "18ea95db-04a0-4564-a871-2d89bf9b6bd5",
139
+ "MALDIVES": "96304e76-87c2-4ea2-979d-d1dd0a3a8e09",
140
+ "MALI": "5e7032d6-4a29-412a-b1f0-6a88556862be",
141
+ "MALTA": "71757f61-a945-4ab0-bccd-3eba9e29ce9b",
142
+ "MAURITANIA": "b5ec75db-b92d-4b7e-ad2b-d97d132d00cb",
143
+ "MAURITIUS": "a8c9190a-f11a-42f6-914e-32907a9b8c50",
144
+ "MEXICO": "89b96bc1-e4ee-433c-953a-d5c5316ad3d8",
145
+ "MOLDOVA": "e8548c39-a894-4283-8792-1c54b601bd13",
146
+ "MONACO": "c17c385d-793f-42bf-be59-dd910d88e345",
147
+ "MONGOLIA": "c9464187-3364-4455-9637-a4801beb1293",
148
+ "MONTENEGRO": "b12556a9-7ee3-43b8-8630-7b99643d2419",
149
+ "MOROCCO": "ae28d967-9143-4a3c-88c9-32ecda8a5f14",
150
+ "MOZAMBIQUE": "aa50af6e-ffda-421a-8b07-bbd8376ffcbe",
151
+ "MYANMAR": "63419316-d037-4edc-ab35-d3bd8da1487c",
152
+ "NAMIBIA": "bb48769c-3071-42bb-813e-645f18b1ff0b",
153
+ "NEPAL": "8da9858f-bdf2-4b6b-9429-238ebec039dd",
154
+ "NETHERLANDS": "220ac96a-b9c9-4f77-8711-677004adaee2",
155
+ "NEW ZEALAND": "ef268b73-8b9e-478f-a459-32058e33b1f3",
156
+ "NICARAGUA": "6206cd29-800f-46b1-8a8d-a8cd70123281",
157
+ "NIGER": "edc16d7a-71ac-45ac-8e22-315f2ec89d96",
158
+ "NIGERIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
159
+ "NORTH MACEDONIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
160
+ "NORWAY": "f322b0c9-61a5-4ee8-9cdb-f046a5245e6b",
161
+ "OMAN": "6aa75c1b-e7eb-43f8-88d7-1f0ad5764b43",
162
+ "PAKISTAN": "d6adb2da-a3d0-4143-bcc5-1651b0671851",
163
+ "PALAU": "90782348-c645-4400-84cb-2c4307e2d374",
164
+ "PALESTINE": "e7b5ec63-4203-4668-a377-dd2b526682df",
165
+ "PANAMA": "ec76dc7f-faca-4bfd-b76d-3de68e0cf280",
166
+ "PAPUA NEW GUINEA": "275b19e4-50a1-41c7-b6d8-cfbbdb03d046",
167
+ "PARAGUAY": "e58d82e1-a65c-410a-ac8a-866c6f25da9f",
168
+ "PERU": "e5f942ed-23fd-4f2d-a5d4-97399753ec08",
169
+ "PHILIPPINES": "8db55c27-d5c2-4cf7-bbaa-6db35fe3863d",
170
+ "POLAND": "733b9eb2-8fea-49ab-9b7d-ba9d56ac0108",
171
+ "PORTUGAL": "2fe14725-2c04-4c6a-963c-dcde840dcfd5",
172
+ "QATAR": "ff4c5ef0-b98c-4316-93a1-401d0e503d23",
173
+ "REPUBLIC OF KOREA": "0e63775f-7370-4203-9ff4-8a7ce1b83da5",
174
+ "REPUBLIC OF SOUTH AFRICA": "89ee80a3-b660-44cc-b09d-9e8b02df327c",
175
+ "ROMANIA": "cd630114-9710-49a8-a450-9eff8b9f7966",
176
+ "RUSSIA": "c9927f33-3778-4955-a5ed-5be3fe3c2c86",
177
+ "RWANDA": "fb80a5a2-7da4-4684-9a08-ac9935a7687c",
178
+ "SAINT KITTS AND NEVIS": "652bdc10-1147-4da7-b87d-0a2d87f456ff",
179
+ "SAINT LUCIA": "b72c2f44-6a70-44cc-92c9-e14f791ed3a0",
180
+ "SAINT VINCENT AND GRENADINES": "57c54eb4-4317-4873-b20f-9eca9db38461",
181
+ "SAMOA": "1ff2297d-ca8a-41d0-8ed3-835773306c91",
182
+ "SAN MARINO": "1bd460aa-a6f7-4517-b557-68c9a5c4b575",
183
+ "SAO TOME AND PRINCIPE": "a6945046-ca8d-4df0-9d85-76605233e7cd",
184
+ "SAUDI ARABIA": "36cb6d47-cfd4-42a0-ac3d-398d76f932c3",
185
+ "SENEGAL": "02b4584f-7c26-4b8f-8d07-7d79552045a7",
186
+ "SERBIA": "77514d20-e823-4134-9be1-663279285570",
187
+ "SEYCHELLES": "9a5bf6eb-bdd3-46ce-9298-325ee83c6e8a",
188
+ "SIERRA LEONE": "536c5bbc-9d92-4831-856f-0b4183866560",
189
+ "SINGAPORE": "76f0332d-ff00-4e69-acda-73d153c6ce66",
190
+ "SLOVAKIA": "2c48b68b-5184-4d9f-9635-b4376855f832",
191
+ "SLOVENIA": "95c29f1b-464b-4985-b5f2-9589e977cd65",
192
+ "SOLOMON ISLANDS": "a6cc707d-cb28-4fff-aca0-8d7cb53e003b",
193
+ "SOUTH SUDAN": "4a72d20d-8764-4f24-a6c5-5733637e0947",
194
+ "SPAIN": "a7391e9c-7a1f-4184-bb40-b8d82b001643",
195
+ "SRI LANKA": "c0d856c9-bac0-4566-aeb6-db30d16b611c",
196
+ "SUDAN": "f5d95a4d-2857-44fe-a124-1c041aab463a",
197
+ "SURINAME": "0697ba08-aea2-40cb-b54a-4dddf66cc568",
198
+ "SWEDEN": "5285ce98-204f-42c7-9a8b-0ce793992540",
199
+ "SWITZERLAND": "4b8a01ac-ebac-4157-ba96-52a42452b8fc",
200
+ "SYRIA": "a4c2a6b9-599e-4f32-a2cb-a7a8c419d3fd",
201
+ "TAIWAN": "cab7f051-6e1e-40e8-bc1d-304c7d718205",
202
+ "TAJIKISTAN": "4e06dd54-cb40-4689-ba37-d52ee18b6232",
203
+ "TANZANIA": "781d5b7f-8d72-4ab6-bb85-9d84c7002655",
204
+ "THAILAND": "568938de-3757-4038-ab10-9515de53dcde",
205
+ "TIMOR LESTE": "92991bf5-3eb4-4bcd-a9cd-eb3791f61855",
206
+ "TOGO": "4472c324-c691-41ef-993f-1fdc68bdaeb4",
207
+ "TONGA": "5da97514-d2f2-49e2-8b3c-c2cd837076c9",
208
+ "TRINIDAD AND TOBAGO": "7052f57f-ed0f-4af7-a2bd-c56add55c58b",
209
+ "TUNISIA": "d43d13b8-c1f6-4b99-96fc-46f8566cf2f8",
210
+ "TURKEY": "ef010e72-e3d1-47af-81eb-c801725dfd56",
211
+ "TURKMENISTAN": "d86a010c-3d78-4e3c-a0a2-a8c28ba2faa5",
212
+ "UGANDA": "77291ff1-eab3-4a48-b4f8-a554edb33c54",
213
+ "UKRAINE": "e01dfda0-5841-4395-8073-995bac33530c",
214
+ "UNITED ARAB EMIRATES": "97ed438c-9faf-4f9c-a407-72e3718ca022",
215
+ "UNITED KINGDOM": "94ce109e-b072-4229-9285-cf9cacc8fb2d",
216
+ "UNITED STATES OF AMERICA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7",
217
+ "USA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7", # Alias
218
+ "URUGUAY": "2b5383fd-857a-4d13-9da2-6cc8817d94e4",
219
+ "UZBEKISTAN": "78743012-4d5f-499c-b1f7-87a30df735f9",
220
+ "VANUATU": "402de03b-eba7-4fca-ab14-5a1c8efeeec7",
221
+ "VENEZUELA": "41b2e501-26aa-4787-97fa-56af1e94b82c",
222
+ "VIETNAM": "e3838f92-2f1d-449c-b6b8-76b089027f0a",
223
+ "YEMEN": "e8324fd1-9cee-4772-97af-651408c0bab9",
224
+ "ZAMBIA": "16543ea3-11bd-4551-9d9e-44abbec77c25",
225
+ "ZIMBABWE": "e0ea4fa1-498e-4d95-89eb-24415907dcab"
226
+ }
227
+
228
+ # Parent Activity mapping (Step 0)
229
+ PARENT_ACTIVITY_MAPPING = {
230
+ "General, Family, or Social": "d5bc2168-2f4a-4396-8eae-3d895a0508e9",
231
+ "Investment, Business, or Government": "f7a8ac1d-a71f-45d3-919f-985e295533f2",
232
+ "Golden Visa": "63350ecf-72a1-4fd1-8674-d42f815615fd",
233
+ "Diaspora": "58d2cbb5-423a-4f8e-8e3c-bcddd9f7980e",
234
+ "Silver Hair & Retirement": "ad33081b-7c1d-4c07-a7ed-aa36f0b54bb3",
235
+ "Second Home": "1bb683e9-bb81-4a85-9651-ba4d4174ff0e",
236
+ "Professional or Employment": "40d4fdc7-4117-48ff-9ed6-3950088fc760",
237
+ "Journalist or Film": "01c17cd8-912b-457f-b3cc-d8919ab8964b",
238
+ "Sport or Performer": "ec91d849-02c7-4d1d-831b-ac0764ab8cc5",
239
+ "Study, Courses, Training or Research": "2221fe46-ea42-4d2f-a332-899e60ef6fe2",
240
+ "Work and Holiday": "f9b4e188-f90e-4f3d-bace-71c2c27d5159"
241
+ }
242
+
243
+ def __init__(self):
244
+ self.headers = {
245
+ "Accept": "application/json, text/plain, */*",
246
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
247
+ "Referer": "https://evisa.imigrasi.go.id/",
248
+ "Origin": "https://evisa.imigrasi.go.id",
249
+ "Accept-Language": "en-US,en;q=0.9",
250
+ "Accept-Encoding": "gzip, deflate, br"
251
+ }
252
+
253
+ def get_sub_activities(self, parent_activity_id: str) -> Optional[List[Dict]]:
254
+ """
255
+ Step 0: Get sub-activities for a parent activity
256
+ """
257
+ payload = {
258
+ "parent_id": parent_activity_id,
259
+ "step": "0"
260
+ }
261
+
262
+ try:
263
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
264
+ response.raise_for_status()
265
+ data = response.json()
266
+
267
+ if "data" in data:
268
+ return data["data"]
269
+ return None
270
+ except Exception as e:
271
+ print(f"Error fetching sub-activities: {e}")
272
+ return None
273
+
274
+ def _extract_stay_and_cost_from_html(self, html_content: str) -> Dict[str, Optional[str]]:
275
+ """Parses raw HTML to find and extract summary text for Stay and Cost."""
276
+ summary = {"stay": None, "cost": None}
277
+ print("\n--- DEBUG: Parsing HTML for Stay/Cost ---")
278
+ print(html_content)
279
+ print("-----------------------------------------\n")
280
+ if not html_content:
281
+ return summary
282
+
283
+ soup = BeautifulSoup(html_content, 'html.parser')
284
+
285
+ for strong_tag in soup.find_all('strong'):
286
+ heading_text = strong_tag.get_text(strip=True).lower()
287
+
288
+ # Navigate up to the top-level parent block of the heading
289
+ parent_block = strong_tag
290
+ while parent_block.parent and parent_block.parent.name != '[document]':
291
+ parent_block = parent_block.parent
292
+
293
+ # The value is in the next sibling block
294
+ next_block = parent_block.find_next_sibling()
295
+ value = next_block.get_text(separator=' ', strip=True) if next_block else "Not specified"
296
+
297
+ if 'stay' in heading_text:
298
+ summary['stay'] = value
299
+ elif 'cost' in heading_text:
300
+ summary['cost'] = value
301
+
302
+ return summary
303
+
304
+ def fix_html_structure(self, html_content: str) -> str:
305
+ """
306
+ Cleans and intelligently restructures malformed HTML from the source API.
307
+ It groups headings with their subsequent content into single <li> elements.
308
+ """
309
+ if not html_content:
310
+ return ""
311
+
312
+ soup = BeautifulSoup(html_content, 'html.parser')
313
+
314
+ final_list_items = []
315
+ current_li_buffer = []
316
+
317
+ def is_heading(element):
318
+ """Heuristic to determine if an element is a section heading."""
319
+ if isinstance(element, NavigableString):
320
+ return False
321
+ # A heading is identified by the presence of a <strong> tag.
322
+ return element.find('strong') is not None
323
+
324
+ def flush_buffer():
325
+ """Processes the buffer and adds its content as a single <li>."""
326
+ if current_li_buffer:
327
+ content = "".join(str(el) for el in current_li_buffer)
328
+ final_list_items.append(f"<li>{content}</li>")
329
+ current_li_buffer.clear()
330
+
331
+ for element in soup.contents:
332
+ if not str(element).strip(): # Skip empty whitespace nodes
333
+ continue
334
+
335
+ if is_heading(element) and current_li_buffer:
336
+ flush_buffer()
337
+
338
+ current_li_buffer.append(element)
339
+
340
+ flush_buffer() # Flush the last remaining item
341
+
342
+ rebuilt_html = "".join(final_list_items)
343
+ rebuilt_html = rebuilt_html.replace('</ol></li>', '</ol>') # Clean up known stray tags
344
+ return f"<ul>{rebuilt_html}</ul>"
345
+
346
+ def get_visa_types(self, activity_id: str, country_id: str) -> Optional[Dict]:
347
+ """
348
+ Step 1: Get available visa types for activity and country
349
+ """
350
+ payload = {
351
+ "activity_id": activity_id,
352
+ "country_id": country_id,
353
+ "step": "1"
354
+ }
355
+
356
+ try:
357
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
358
+ response.raise_for_status()
359
+ data = response.json()
360
+
361
+ # Handle both dict and list responses
362
+ if isinstance(data, dict):
363
+ if data.get("status") == "success":
364
+ # Enrich visa types with stay and cost summaries
365
+ visa_list = data.get("data", [])
366
+ for visa in visa_list:
367
+ details_data = self.get_visa_details(visa['id'])
368
+ if details_data and "visaType" in details_data and details_data["visaType"]:
369
+ info_html = details_data["visaType"][0].get("info", "")
370
+ summary = self._extract_stay_and_cost_from_html(info_html)
371
+ visa['stay_summary'] = summary.get('stay')
372
+ visa['cost_summary'] = summary.get('cost')
373
+ else:
374
+ visa['stay_summary'] = "N/A"
375
+ visa['cost_summary'] = "N/A"
376
+ data['data'] = visa_list
377
+ return data
378
+ elif data.get("status") == "empty":
379
+ return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
380
+ elif isinstance(data, list):
381
+ # API returns ["status","empty",false] for empty responses
382
+ print(data)
383
+ if len(data) >= 2 and data[1] == "empty":
384
+ return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
385
+ return None
386
+ except Exception as e:
387
+ print(f"Error fetching visa types: {e} {data}")
388
+ return None
389
+
390
+ def get_visa_details(self, visa_type_id: str) -> Optional[Dict]:
391
+ """
392
+ Step 2: Get detailed information for a specific visa type
393
+ """
394
+ payload = {
395
+ "visa_type_id": visa_type_id,
396
+ "step": "2"
397
+ }
398
+
399
+ try:
400
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
401
+ response.raise_for_status()
402
+ data = response.json()
403
+
404
+ if data.get("status") == "success":
405
+ return data["data"]
406
+ return None
407
+ except Exception as e:
408
+ print(f"Error fetching visa details: {e}")
409
+ return None
410
+
411
+
412
+ def get_country_id(self, country_name: str) -> Optional[str]:
413
+ """
414
+ Get country ID from country name
415
+ """
416
+ country_upper = country_name.upper()
417
+ return self.COUNTRY_MAPPING.get(country_upper)
418
+
419
+ def get_parent_activity_id(self, activity_name: str) -> Optional[str]:
420
+ """
421
+ Get parent activity ID from activity name
422
+ """
423
+ return self.PARENT_ACTIVITY_MAPPING.get(activity_name)
424
+
425
+ def get_full_visa_info(self, country_name: str, parent_activity_name: str,
426
+ sub_activity_id: Optional[str] = None) -> Dict[str, Any]:
427
+ """
428
+ Get complete visa information through the entire flow
429
+ """
430
+ result = {
431
+ "success": False,
432
+ "country": country_name,
433
+ "parent_activity": parent_activity_name,
434
+ "data": None,
435
+ "error": None
436
+ }
437
+
438
+ # Get country ID
439
+ country_id = self.get_country_id(country_name)
440
+ if not country_id:
441
+ result["error"] = f"Country '{country_name}' not found"
442
+ return result
443
+
444
+ # Get parent activity ID
445
+ parent_activity_id = self.get_parent_activity_id(parent_activity_name)
446
+ if not parent_activity_id:
447
+ result["error"] = f"Activity '{parent_activity_name}' not found"
448
+ return result
449
+
450
+ # Step 0: Get sub-activities
451
+ sub_activities = self.get_sub_activities(parent_activity_id)
452
+ if not sub_activities:
453
+ result["error"] = "Failed to fetch sub-activities"
454
+ return result
455
+
456
+ result["data"] = {
457
+ "sub_activities": sub_activities,
458
+ "visa_types": [],
459
+ "selected_sub_activity": None
460
+ }
461
+
462
+ # If sub_activity_id provided, get visa types
463
+ if sub_activity_id:
464
+ visa_types_data = self.get_visa_types(sub_activity_id, country_id)
465
+
466
+ if visa_types_data:
467
+ if visa_types_data.get("status") == "empty":
468
+ result["data"]["message"] = visa_types_data.get("message")
469
+ else:
470
+ result["data"]["visa_types"] = visa_types_data.get("data", [])
471
+ result["data"]["all_visa_info"] = visa_types_data.get("all", [])
472
+ result["data"]["selected_sub_activity"] = sub_activity_id
473
+
474
+ result["success"] = True
475
+ return result
476
+
477
+ def get_visa_full_details(self, visa_type_id: str) -> Dict[str, Any]:
478
+ """
479
+ Get complete details for a specific visa type
480
+ """
481
+ result = {
482
+ "success": False,
483
+ "data": None,
484
+ "error": None
485
+ }
486
+
487
+ details = self.get_visa_details(visa_type_id)
488
+ if details:
489
+ # Parse HTML content if present
490
+ if "visaType" in details and details["visaType"]:
491
+ visa_info = details["visaType"][0]
492
+
493
+ # The source HTML is malformed, so we must clean and restructure it.
494
+ if "info" in visa_info:
495
+ visa_info["info_html"] = self.fix_html_structure(visa_info.get("info", ""))
496
+
497
+ if "information" in visa_info:
498
+ visa_info["information_html"] = self.fix_html_structure(visa_info.get("information", ""))
499
+
500
+ result["data"] = visa_info
501
+ result["success"] = True
502
+ else:
503
+ result["error"] = "Failed to fetch visa details"
504
+
505
+ return result