mrfirdauss commited on
Commit
2c76fa0
·
1 Parent(s): 47199f8

feat: merge streamlit

Browse files
Dockerfile CHANGED
@@ -30,4 +30,4 @@ EXPOSE 8501
30
 
31
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
32
 
33
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
30
 
31
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
32
 
33
+ ENTRYPOINT ["streamlit", "run", "src/Main.py", "--server.port=8501", "--server.address=0.0.0.0"]
requirements.txt CHANGED
@@ -3,4 +3,5 @@ pandas
3
  requests
4
  python-dotenv
5
  streamlit
6
- pydantic
 
 
3
  requests
4
  python-dotenv
5
  streamlit
6
+ pydantic
7
+ beautifulsoup4
src/{streamlit_app.py → Main.py} RENAMED
File without changes
src/models.py CHANGED
@@ -1,7 +1,7 @@
1
  from pydantic import BaseModel, Field
2
- from typing import Optional, List
3
  from enum import StrEnum
4
-
5
 
6
  class Status(StrEnum):
7
  RELEVANT = "RELEVANT"
@@ -23,3 +23,21 @@ class LinkNode(BaseModel):
23
  child: List[str] = Field(..., description="List of links found on this page")
24
  depth: int = Field(..., description="Depth level in the link hierarchy (0=root, 1=child of root, etc.)")
25
  raw_text: Optional[str] = None # Field to store scraped text before analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pydantic import BaseModel, Field
2
+ from typing import Optional, List, Dict
3
  from enum import StrEnum
4
+ from dataclasses import dataclass
5
 
6
  class Status(StrEnum):
7
  RELEVANT = "RELEVANT"
 
23
  child: List[str] = Field(..., description="List of links found on this page")
24
  depth: int = Field(..., description="Depth level in the link hierarchy (0=root, 1=child of root, etc.)")
25
  raw_text: Optional[str] = None # Field to store scraped text before analysis
26
+
27
+
28
+ @dataclass
29
+ class VisaInfo:
30
+ """Data class for visa information"""
31
+ id: str
32
+ name: str
33
+ code: Optional[str] = None
34
+ description: Optional[str] = None
35
+ stay_duration: Optional[str] = None
36
+ duration_time: Optional[str] = None
37
+ is_multiple_entry: bool = False
38
+ is_arrival: bool = False
39
+ is_guarantor: bool = False
40
+ cost: Optional[str] = None
41
+ validity: Optional[str] = None
42
+ requirements: Optional[Dict] = None
43
+ detailed_info: Optional[str] = None
streamlit_visa_app.py → src/pages/eVoA_Indo.py RENAMED
@@ -1,277 +1,274 @@
1
- # streamlit_visa_app.py
2
- import streamlit as st
3
- import json
4
- from visa_scraper import IndonesianVisaScraper
5
- import pandas as pd
6
- from typing import Dict, List, Optional
7
-
8
- # Page configuration
9
- st.set_page_config(
10
- page_title="Indonesian Visa Information System",
11
- page_icon="🇮🇩",
12
- layout="wide"
13
- )
14
-
15
- # Initialize the scraper
16
- @st.cache_resource
17
- def init_scraper():
18
- return IndonesianVisaScraper()
19
-
20
- def format_visa_info(visa_data: Dict) -> str:
21
- """Format visa information for display"""
22
- info = []
23
-
24
- if "name" in visa_data:
25
- info.append(f"**Name:** {visa_data['name']}")
26
-
27
- if "code" in visa_data:
28
- info.append(f"**Code:** {visa_data['code']}")
29
-
30
- if "stay_description" in visa_data:
31
- info.append(f"**Stay Description:** {visa_data['stay_description']}")
32
-
33
- if "duration_time" in visa_data:
34
- info.append(f"**Duration:** {visa_data['duration_time']}")
35
-
36
- if "is_multiple_entry" in visa_data:
37
- entry_type = "Multiple Entry" if visa_data['is_multiple_entry'] else "Single Entry"
38
- info.append(f"**Entry Type:** {entry_type}")
39
-
40
- if "is_arrival" in visa_data:
41
- arrival = "Visa on Arrival" if visa_data['is_arrival'] else "Pre-arranged Visa"
42
- info.append(f"**Visa Type:** {arrival}")
43
-
44
- if "description" in visa_data and visa_data['description']:
45
- info.append(f"\n**Description:**\n{visa_data['description']}")
46
-
47
- return "\n\n".join(info)
48
-
49
- def main():
50
- st.title("🇮🇩 Indonesian Visa Information System")
51
- st.markdown("---")
52
-
53
- # Initialize session state
54
- if 'selected_sub_activity' not in st.session_state:
55
- st.session_state.selected_sub_activity = None
56
- if 'visa_data' not in st.session_state:
57
- st.session_state.visa_data = None
58
- if 'selected_visa_type' not in st.session_state:
59
- st.session_state.selected_visa_type = None
60
-
61
- scraper = init_scraper()
62
-
63
- # Create three columns for the main selection
64
- col1, col2, col3 = st.columns(3)
65
-
66
- with col1:
67
- # Country selection
68
- st.subheader("📍 Step 1: Select Country")
69
- countries = sorted(list(scraper.COUNTRY_MAPPING.keys()))
70
- selected_country = st.selectbox(
71
- "Choose your country:",
72
- options=countries,
73
- help="Select your country of citizenship"
74
- )
75
-
76
- with col2:
77
- # Parent Activity selection
78
- st.subheader("📋 Step 2: Select Main Purpose")
79
- activities = list(scraper.PARENT_ACTIVITY_MAPPING.keys())
80
- selected_activity = st.selectbox(
81
- "Main purpose of visit:",
82
- options=activities,
83
- help="Select the main purpose of your visit to Indonesia"
84
- )
85
-
86
- with col3:
87
- # Get sub-activities button
88
- st.subheader("🔍 Step 3: Get Sub-Activities")
89
- st.write("") # Add some spacing
90
- if st.button("Get Sub-Activities", type="primary", use_container_width=True):
91
- if selected_country and selected_activity:
92
- with st.spinner("Fetching sub-activities..."):
93
- parent_id = scraper.get_parent_activity_id(selected_activity)
94
- sub_activities = scraper.get_sub_activities(parent_id)
95
-
96
- if sub_activities:
97
- st.session_state.sub_activities = sub_activities
98
- st.success(f"Found {len(sub_activities)} sub-activities!")
99
- else:
100
- st.error("Failed to fetch sub-activities")
101
-
102
- # Display sub-activities if available
103
- if 'sub_activities' in st.session_state and st.session_state.sub_activities:
104
- st.markdown("---")
105
- st.subheader("📂 Step 4: Select Sub-Activity")
106
-
107
- # Create a dropdown for sub-activities
108
- sub_activity_options = {
109
- item['name']: item['id']
110
- for item in st.session_state.sub_activities
111
- }
112
-
113
- selected_sub_activity_name = st.selectbox(
114
- "Choose sub-activity:",
115
- options=list(sub_activity_options.keys()),
116
- help="Select the specific purpose of your visit"
117
- )
118
-
119
- if selected_sub_activity_name:
120
- st.session_state.selected_sub_activity = sub_activity_options[selected_sub_activity_name]
121
-
122
- # Get visa types button
123
- if st.button("Get Visa Types", type="primary"):
124
- with st.spinner("Fetching visa types..."):
125
- country_id = scraper.get_country_id(selected_country)
126
- visa_data = scraper.get_visa_types(
127
- st.session_state.selected_sub_activity,
128
- country_id
129
- )
130
-
131
- if visa_data:
132
- if visa_data.get("status") == "empty":
133
- st.warning(visa_data.get("message", "This type of visa must be applied by guarantor."))
134
- else:
135
- st.session_state.visa_data = visa_data
136
- st.success(f"Found {len(visa_data.get('data', []))} visa types!")
137
- else:
138
- st.error("Failed to fetch visa types")
139
-
140
- # Display visa types if available
141
- if st.session_state.visa_data and st.session_state.visa_data.get('data'):
142
- st.markdown("---")
143
- st.subheader("🎫 Step 5: Available Visa Types")
144
-
145
- # Create tabs for different visa types
146
- visa_types = st.session_state.visa_data['data']
147
-
148
- # Display visa types in a grid
149
- for i in range(0, len(visa_types), 3):
150
- cols = st.columns(3)
151
- for j in range(3):
152
- if i + j < len(visa_types):
153
- visa = visa_types[i + j]
154
- with cols[j]:
155
- with st.container():
156
- st.markdown(f"### {visa.get('name', 'N/A')}")
157
-
158
- # Display basic info
159
- if visa.get('is_arrival'):
160
- st.success("✈️ Visa on Arrival")
161
- else:
162
- st.info("📝 Pre-arranged Visa")
163
-
164
- if visa.get('is_multiple_entry'):
165
- st.info("🔄 Multiple Entry")
166
- else:
167
- st.info("➡️ Single Entry")
168
-
169
- # Display Stay and Cost summary
170
- if visa.get('stay_summary'):
171
- st.markdown(f"**Stay:** {visa['stay_summary']}")
172
- if visa.get('cost_summary'):
173
- st.markdown(f"**Cost:** {visa['cost_summary']}")
174
-
175
- # Get details button
176
- if st.button(f"Get Details", key=f"details_{visa['id']}"):
177
- with st.spinner("Fetching visa details..."):
178
- details = scraper.get_visa_full_details(visa['id'])
179
- if details['success']:
180
- st.session_state.selected_visa_type = details['data']
181
- st.rerun()
182
-
183
- # Display detailed visa information
184
- if st.session_state.selected_visa_type:
185
- st.markdown("---")
186
- st.subheader("📄 Visa Details")
187
-
188
- visa_info = st.session_state.selected_visa_type
189
-
190
- # Create two columns for detailed information
191
- detail_col1, detail_col2 = st.columns(2)
192
-
193
- with detail_col1:
194
- st.markdown("### Basic Information")
195
- st.write(f"**Name:** {visa_info.get('name', 'N/A')}")
196
- st.write(f"**Code:** {visa_info.get('code', 'N/A')}")
197
- st.write(f"**Duration:** {visa_info.get('duration_time', 'N/A')}")
198
- st.write(f"**Validity:** {visa_info.get('day_expired', 'N/A')} days")
199
-
200
- if visa_info.get('is_multiple_entry'):
201
- st.success("✅ Multiple Entry Allowed")
202
- else:
203
- st.warning("⚠️ Single Entry Only")
204
-
205
- if visa_info.get('is_arrival'):
206
- st.success("✅ Visa on Arrival Available")
207
- else:
208
- st.info("📝 Pre-arranged Visa Required")
209
-
210
- if visa_info.get('is_guarantor'):
211
- st.warning("⚠️ Guarantor Required")
212
-
213
- with detail_col2:
214
- st.markdown("### Requirements")
215
- st.write(f"**Passport Validity:** {visa_info.get('passport_value', 6)} {visa_info.get('passport_unit', 'months')}")
216
-
217
- if visa_info.get('is_verification'):
218
- st.info(" Verification Required")
219
-
220
- if visa_info.get('is_login'):
221
- st.info("🔐 Login Required for Application")
222
-
223
- # Display description
224
- if visa_info.get('description'):
225
- st.markdown("### Description")
226
- with st.expander("View Full Description"):
227
- st.write(visa_info['description'])
228
-
229
- # Display detailed HTML information if available
230
- if visa_info.get('info_html'):
231
- st.markdown("### Detailed Information")
232
- with st.expander("View Detailed Requirements and Conditions", expanded=True):
233
- st.markdown(visa_info['info_html'], unsafe_allow_html=True)
234
-
235
- # Clear button
236
- if st.button("Clear Selection", type="secondary"):
237
- st.session_state.selected_visa_type = None
238
- st.rerun()
239
-
240
- # Sidebar with additional information
241
- with st.sidebar:
242
- st.markdown("## 🔍 Quick Reference")
243
- st.markdown("""
244
- ### How to Use:
245
- 1. **Select your country** from the dropdown
246
- 2. **Choose main purpose** of your visit
247
- 3. **Get sub-activities** for your purpose
248
- 4. **Select specific activity**
249
- 5. **View available visa types**
250
- 6. **Get detailed information** for each visa
251
-
252
- ### Visa Categories:
253
- - **VOA:** Visa on Arrival
254
- - **BVK:** Visa Exemption
255
- - **Single Entry:** One-time entry
256
- - **Multiple Entry:** Multiple entries allowed
257
-
258
- ### Important Notes:
259
- - Some visas require a guarantor
260
- - Check passport validity requirements
261
- - Verify if pre-arrangement is needed
262
- """)
263
-
264
- st.markdown("---")
265
- st.markdown("### 📊 Statistics")
266
- if st.session_state.visa_data and st.session_state.visa_data.get('data'):
267
- visa_types = st.session_state.visa_data['data']
268
- st.metric("Total Visa Types", len(visa_types))
269
-
270
- voa_count = sum(1 for v in visa_types if v.get('is_arrival'))
271
- st.metric("Visa on Arrival", voa_count)
272
-
273
- multi_entry = sum(1 for v in visa_types if v.get('is_multiple_entry'))
274
- st.metric("Multiple Entry", multi_entry)
275
-
276
- if __name__ == "__main__":
277
  main()
 
1
+ import streamlit as st
2
+ from visa_scraper import IndonesianVisaScraper
3
+ from typing import Dict
4
+
5
+ # Page configuration
6
+ st.set_page_config(
7
+ page_title="Indonesian Visa Information System",
8
+ page_icon="🇮🇩",
9
+ layout="wide"
10
+ )
11
+
12
+ # Initialize the scraper
13
+ @st.cache_resource
14
+ def init_scraper():
15
+ return IndonesianVisaScraper()
16
+
17
+ def format_visa_info(visa_data: Dict) -> str:
18
+ """Format visa information for display"""
19
+ info = []
20
+
21
+ if "name" in visa_data:
22
+ info.append(f"**Name:** {visa_data['name']}")
23
+
24
+ if "code" in visa_data:
25
+ info.append(f"**Code:** {visa_data['code']}")
26
+
27
+ if "stay_description" in visa_data:
28
+ info.append(f"**Stay Description:** {visa_data['stay_description']}")
29
+
30
+ if "duration_time" in visa_data:
31
+ info.append(f"**Duration:** {visa_data['duration_time']}")
32
+
33
+ if "is_multiple_entry" in visa_data:
34
+ entry_type = "Multiple Entry" if visa_data['is_multiple_entry'] else "Single Entry"
35
+ info.append(f"**Entry Type:** {entry_type}")
36
+
37
+ if "is_arrival" in visa_data:
38
+ arrival = "Visa on Arrival" if visa_data['is_arrival'] else "Pre-arranged Visa"
39
+ info.append(f"**Visa Type:** {arrival}")
40
+
41
+ if "description" in visa_data and visa_data['description']:
42
+ info.append(f"\n**Description:**\n{visa_data['description']}")
43
+
44
+ return "\n\n".join(info)
45
+
46
+ def main():
47
+ st.title("🇮🇩 Indonesian Visa Information System")
48
+ st.markdown("---")
49
+
50
+ # Initialize session state
51
+ if 'selected_sub_activity' not in st.session_state:
52
+ st.session_state.selected_sub_activity = None
53
+ if 'visa_data' not in st.session_state:
54
+ st.session_state.visa_data = None
55
+ if 'selected_visa_type' not in st.session_state:
56
+ st.session_state.selected_visa_type = None
57
+
58
+ scraper = init_scraper()
59
+
60
+ # Create three columns for the main selection
61
+ col1, col2, col3 = st.columns(3)
62
+
63
+ with col1:
64
+ # Country selection
65
+ st.subheader("📍 Step 1: Select Country")
66
+ countries = sorted(list(scraper.COUNTRY_MAPPING.keys()))
67
+ selected_country = st.selectbox(
68
+ "Choose your country:",
69
+ options=countries,
70
+ help="Select your country of citizenship"
71
+ )
72
+
73
+ with col2:
74
+ # Parent Activity selection
75
+ st.subheader("📋 Step 2: Select Main Purpose")
76
+ activities = list(scraper.PARENT_ACTIVITY_MAPPING.keys())
77
+ selected_activity = st.selectbox(
78
+ "Main purpose of visit:",
79
+ options=activities,
80
+ help="Select the main purpose of your visit to Indonesia"
81
+ )
82
+
83
+ with col3:
84
+ # Get sub-activities button
85
+ st.subheader("🔍 Step 3: Get Sub-Activities")
86
+ st.write("") # Add some spacing
87
+ if st.button("Get Sub-Activities", type="primary", use_container_width=True):
88
+ if selected_country and selected_activity:
89
+ with st.spinner("Fetching sub-activities..."):
90
+ parent_id = scraper.get_parent_activity_id(selected_activity)
91
+ sub_activities = scraper.get_sub_activities(parent_id)
92
+
93
+ if sub_activities:
94
+ st.session_state.sub_activities = sub_activities
95
+ st.success(f"Found {len(sub_activities)} sub-activities!")
96
+ else:
97
+ st.error("Failed to fetch sub-activities")
98
+
99
+ # Display sub-activities if available
100
+ if 'sub_activities' in st.session_state and st.session_state.sub_activities:
101
+ st.markdown("---")
102
+ st.subheader("📂 Step 4: Select Sub-Activity")
103
+
104
+ # Create a dropdown for sub-activities
105
+ sub_activity_options = {
106
+ item['name']: item['id']
107
+ for item in st.session_state.sub_activities
108
+ }
109
+
110
+ selected_sub_activity_name = st.selectbox(
111
+ "Choose sub-activity:",
112
+ options=list(sub_activity_options.keys()),
113
+ help="Select the specific purpose of your visit"
114
+ )
115
+
116
+ if selected_sub_activity_name:
117
+ st.session_state.selected_sub_activity = sub_activity_options[selected_sub_activity_name]
118
+
119
+ # Get visa types button
120
+ if st.button("Get Visa Types", type="primary"):
121
+ with st.spinner("Fetching visa types..."):
122
+ country_id = scraper.get_country_id(selected_country)
123
+ visa_data = scraper.get_visa_types(
124
+ st.session_state.selected_sub_activity,
125
+ country_id
126
+ )
127
+
128
+ if visa_data:
129
+ if visa_data.get("status") == "empty":
130
+ st.warning(visa_data.get("message", "This type of visa must be applied by guarantor."))
131
+ else:
132
+ st.session_state.visa_data = visa_data
133
+ st.success(f"Found {len(visa_data.get('data', []))} visa types!")
134
+ else:
135
+ st.error("Failed to fetch visa types")
136
+
137
+ # Display visa types if available
138
+ if st.session_state.visa_data and st.session_state.visa_data.get('data'):
139
+ st.markdown("---")
140
+ st.subheader("🎫 Step 5: Available Visa Types")
141
+
142
+ # Create tabs for different visa types
143
+ visa_types = st.session_state.visa_data['data']
144
+
145
+ # Display visa types in a grid
146
+ for i in range(0, len(visa_types), 3):
147
+ cols = st.columns(3)
148
+ for j in range(3):
149
+ if i + j < len(visa_types):
150
+ visa = visa_types[i + j]
151
+ with cols[j]:
152
+ with st.container():
153
+ st.markdown(f"### {visa.get('name', 'N/A')}")
154
+
155
+ # Display basic info
156
+ if visa.get('is_arrival'):
157
+ st.success("✈️ Visa on Arrival")
158
+ else:
159
+ st.info("📝 Pre-arranged Visa")
160
+
161
+ if visa.get('is_multiple_entry'):
162
+ st.info("🔄 Multiple Entry")
163
+ else:
164
+ st.info("➡️ Single Entry")
165
+
166
+ # Display Stay and Cost summary
167
+ if visa.get('stay_summary'):
168
+ st.markdown(f"**Stay:** {visa['stay_summary']}")
169
+ if visa.get('cost_summary'):
170
+ st.markdown(f"**Cost:** {visa['cost_summary']}")
171
+
172
+ # Get details button
173
+ if st.button(f"Get Details", key=f"details_{visa['id']}"):
174
+ with st.spinner("Fetching visa details..."):
175
+ details = scraper.get_visa_full_details(visa['id'])
176
+ if details['success']:
177
+ st.session_state.selected_visa_type = details['data']
178
+ st.rerun()
179
+
180
+ # Display detailed visa information
181
+ if st.session_state.selected_visa_type:
182
+ st.markdown("---")
183
+ st.subheader("📄 Visa Details")
184
+
185
+ visa_info = st.session_state.selected_visa_type
186
+
187
+ # Create two columns for detailed information
188
+ detail_col1, detail_col2 = st.columns(2)
189
+
190
+ with detail_col1:
191
+ st.markdown("### Basic Information")
192
+ st.write(f"**Name:** {visa_info.get('name', 'N/A')}")
193
+ st.write(f"**Code:** {visa_info.get('code', 'N/A')}")
194
+ st.write(f"**Duration:** {visa_info.get('duration_time', 'N/A')}")
195
+ st.write(f"**Validity:** {visa_info.get('day_expired', 'N/A')} days")
196
+
197
+ if visa_info.get('is_multiple_entry'):
198
+ st.success(" Multiple Entry Allowed")
199
+ else:
200
+ st.warning("⚠️ Single Entry Only")
201
+
202
+ if visa_info.get('is_arrival'):
203
+ st.success(" Visa on Arrival Available")
204
+ else:
205
+ st.info("📝 Pre-arranged Visa Required")
206
+
207
+ if visa_info.get('is_guarantor'):
208
+ st.warning("⚠️ Guarantor Required")
209
+
210
+ with detail_col2:
211
+ st.markdown("### Requirements")
212
+ st.write(f"**Passport Validity:** {visa_info.get('passport_value', 6)} {visa_info.get('passport_unit', 'months')}")
213
+
214
+ if visa_info.get('is_verification'):
215
+ st.info(" Verification Required")
216
+
217
+ if visa_info.get('is_login'):
218
+ st.info("🔐 Login Required for Application")
219
+
220
+ # Display description
221
+ if visa_info.get('description'):
222
+ st.markdown("### Description")
223
+ with st.expander("View Full Description"):
224
+ st.write(visa_info['description'])
225
+
226
+ # Display detailed HTML information if available
227
+ if visa_info.get('info_html'):
228
+ st.markdown("### Detailed Information")
229
+ with st.expander("View Detailed Requirements and Conditions", expanded=True):
230
+ st.markdown(visa_info['info_html'], unsafe_allow_html=True)
231
+
232
+ # Clear button
233
+ if st.button("Clear Selection", type="secondary"):
234
+ st.session_state.selected_visa_type = None
235
+ st.rerun()
236
+
237
+ # Sidebar with additional information
238
+ with st.sidebar:
239
+ st.markdown("## 🔍 Quick Reference")
240
+ st.markdown("""
241
+ ### How to Use:
242
+ 1. **Select your country** from the dropdown
243
+ 2. **Choose main purpose** of your visit
244
+ 3. **Get sub-activities** for your purpose
245
+ 4. **Select specific activity**
246
+ 5. **View available visa types**
247
+ 6. **Get detailed information** for each visa
248
+
249
+ ### Visa Categories:
250
+ - **VOA:** Visa on Arrival
251
+ - **BVK:** Visa Exemption
252
+ - **Single Entry:** One-time entry
253
+ - **Multiple Entry:** Multiple entries allowed
254
+
255
+ ### Important Notes:
256
+ - Some visas require a guarantor
257
+ - Check passport validity requirements
258
+ - Verify if pre-arrangement is needed
259
+ """)
260
+
261
+ st.markdown("---")
262
+ st.markdown("### 📊 Statistics")
263
+ if st.session_state.visa_data and st.session_state.visa_data.get('data'):
264
+ visa_types = st.session_state.visa_data['data']
265
+ st.metric("Total Visa Types", len(visa_types))
266
+
267
+ voa_count = sum(1 for v in visa_types if v.get('is_arrival'))
268
+ st.metric("Visa on Arrival", voa_count)
269
+
270
+ multi_entry = sum(1 for v in visa_types if v.get('is_multiple_entry'))
271
+ st.metric("Multiple Entry", multi_entry)
272
+
273
+ if __name__ == "__main__":
 
 
 
274
  main()
src/visa_scraper.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # visa_scraper.py
2
+ import requests
3
+ from typing import Dict, List, Optional, Any
4
+ from bs4 import BeautifulSoup, NavigableString
5
+
6
+
7
+
8
+ class IndonesianVisaScraper:
9
+ """
10
+ Scraper for Indonesian visa information from evisa.imigrasi.go.id
11
+ """
12
+
13
+ BASE_URL = "https://evisa.imigrasi.go.id/web/visa-selection/data"
14
+
15
+ # Country ID mapping
16
+ COUNTRY_MAPPING = {
17
+ "ALBANIA": "df26b6b5-b957-44fc-8775-5a307aff676c",
18
+ "ALGERIA": "ee9a47a3-c229-4384-a00e-c5d3132a2b6a",
19
+ "AMERICAN SAMOA": "f57b1c29-b107-402d-a33c-2198d61dee4a",
20
+ "ANDORRA": "7fc48e74-1a09-4edd-9bca-d6622ea82b1e",
21
+ "ANGOLA": "2cd8b0e4-f9a4-44f5-8014-23f1f03d3193",
22
+ "ANGUILLA": "7d9c632d-106a-40b5-b4de-d46572bdb18e",
23
+ "ANTIGUA AND BARBUDA": "dab42168-cd9e-4e6f-86ec-a1a773d34684",
24
+ "ARGENTINA": "7a0eaa53-353b-4206-9299-1badcdb0fdf1",
25
+ "ARMENIA": "e7bcd2e1-fb4b-4748-8fce-b2d0e3b3827c",
26
+ "AUSTRALIA": "46283cb1-d406-47c1-86fd-8c308ffa173a",
27
+ "AUSTRIA": "727e690b-fc21-4b8a-b17f-1de1616433b7",
28
+ "AZERBAIJAN": "a11144b1-8228-42d5-8242-a0f529c0de10",
29
+ "BAHAMAS": "0685586c-7e37-4aa1-8020-f4aa453e9472",
30
+ "BAHRAIN": "f3373174-008f-4f5c-b902-e79763719c64",
31
+ "BANGLADESH": "a14de768-1e37-4d93-8def-352266de349a",
32
+ "BARBADOS": "02054c21-3f67-4fe3-95bc-d20106985e49",
33
+ "BELARUS": "9ff91215-8e06-45b4-8f88-f7685d249358",
34
+ "BELGIUM": "94629b00-f4c4-46bd-b263-cc7c53f8b9d9",
35
+ "BELIZE": "ada414e8-6fd5-4b69-82eb-89472e20d3be",
36
+ "BENIN": "88a03b34-1b9e-4bd6-ba7b-2a7bd8838c24",
37
+ "BERMUDA": "50425995-e990-47c5-8e5e-882840ef2b7a",
38
+ "BHUTAN": "4528297b-9315-45f8-adf1-3a5f0b7e994f",
39
+ "BOLIVIA": "bdc856ae-6948-4381-9648-8fc41bef7577",
40
+ "BOSNIA AND HERZEGOVINA": "fa943b74-22e9-4650-bccb-14b466bde722",
41
+ "BOTSWANA": "f98613a2-3816-4af6-8d27-e7edc2e67366",
42
+ "BRAZIL": "cf4dfdbe-327b-4c9a-bd39-441a338cd276",
43
+ "BRUNEI DARUSSALAM": "687653fc-236b-4bc8-baf8-88a81f17618a",
44
+ "BULGARIA": "7345359f-4f82-4089-a153-acf32254b0a3",
45
+ "BURKINA FASO": "83c3ceed-af1c-4b06-bb4a-0217712043c8",
46
+ "BURUNDI": "ff78c621-fa7f-467b-812c-9912e88ac430",
47
+ "CAMBODIA": "3f387ed5-13e6-42b8-ba77-84f281d995f7",
48
+ "CAMEROON": "1d91caf5-bcf2-4e4b-8c77-7d62781bf220",
49
+ "CANADA": "47db151f-ce3e-406d-b707-8fcdfd088a61",
50
+ "CAPE VERDE": "3ecf1c2c-de95-4811-9e5e-4b03816a108a",
51
+ "CENTRAL AFRICAN REPUBLIC": "0c72f1ee-2e31-4713-86ac-e079c0f994e7",
52
+ "CHAD": "abbd5561-fde5-42e5-8237-382ab808ec8f",
53
+ "CHILE": "0c597abe-9cf2-4171-8449-06bcc928afd8",
54
+ "CHINA": "883891b6-1c3f-41c3-9279-4745f63575cb",
55
+ "COLOMBIA": "76578a13-3e64-41da-8d91-deef561311c2",
56
+ "COMOROS": "e350c59c-5236-4207-8ef9-88e4aebeda9d",
57
+ "CONGO REPUBLIC": "41d426c7-b47f-4e73-a810-dc78bcaae82f",
58
+ "COSTA RICA": "d16f1de8-e959-4569-89ab-0e165456eeb0",
59
+ "CROATIA": "a93f74b3-fcc6-473c-89f9-0e7af7d7f6b6",
60
+ "CUBA": "4f6e354a-ca46-49f3-b035-8188b2d1f6b6",
61
+ "CYPRUS": "8e5508a3-0173-4be4-bdc8-23986b9179d7",
62
+ "CZECH REPUBLIC": "318736c7-f14f-4560-a65a-8c2304ea6805",
63
+ "DEMOCRATIC REPUBLIC OF CONGO": "7d5d09ba-f7a2-4780-a730-ac5749aeae88",
64
+ "DENMARK": "25d567a3-7a4e-4248-9800-647028da9b71",
65
+ "DJIBOUTI": "79c7ec19-d69c-48f4-9cbc-96110e24667d",
66
+ "DOMINICA": "ac9f23a8-fd8e-4877-bab8-367ce26cbb06",
67
+ "DOMINICAN REPUBLIC": "0df2afd4-1945-40c3-a3bb-bcc1fa8ab081",
68
+ "ECUADOR": "c7339e3d-cbe9-4893-af6f-0bdc4b6ebd26",
69
+ "EGYPT": "29a5479e-3e9a-4fe6-a4b2-705de1654fb7",
70
+ "EL SALVADOR": "c20c9d1a-3d42-48c1-8033-12a837058ba6",
71
+ "EQUATORIAL GUINEA": "70003abd-8f1e-4baa-b513-a406405c85bd",
72
+ "ERITREA": "e9ab2043-0c97-4c52-b103-8118861898d0",
73
+ "ESTONIA": "448fa696-e0ca-4e5f-89cd-671f04d48c9f",
74
+ "ESWATINI": "033731b5-112f-49bc-a182-7b1429049dbf",
75
+ "ETHIOPIA": "a0a10b9e-3157-4d65-84c3-e96bc8a99979",
76
+ "FIJI": "67f91d7d-5166-44d3-b050-45fe4fa6bad7",
77
+ "FINLAND": "52006b57-7b02-4902-b55d-0dadc577b75c",
78
+ "FRANCE": "23e8665d-2f2c-4841-acf6-6a2cac358ed4",
79
+ "GABON": "38381f24-d8fb-42e4-ba80-f0b1451e5577",
80
+ "GAMBIA": "41d04ba0-6764-4b55-88ed-e1f776743128",
81
+ "GEORGIA": "f851c74a-b9dc-4d9d-a99c-65b6a69afe95",
82
+ "GERMANY": "2d7c66c1-9341-459c-83a8-8138766b133c",
83
+ "GHANA": "012e51a0-4e9c-4dfd-8438-b4cc375fb0ed",
84
+ "GREECE": "47aa18d9-ed29-4250-9206-6f0cdaec94ff",
85
+ "GRENADA": "9d14a529-4116-449f-818f-538994ae14c8",
86
+ "GUATEMALA": "0159ccc8-a34c-4a6d-846f-510296a43536",
87
+ "GUINEA": "5cd3306f-e3de-4031-b1f4-fb5d07e4de70",
88
+ "GUINEA-BISSAU": "e04e1e50-e2a8-4b11-97c3-f0c8d759af9a",
89
+ "GUYANA": "77774317-99e1-43c3-ada2-62aaf1a22f81",
90
+ "HAITI": "91d309f2-de3f-40bb-80e7-4b68077fdc33",
91
+ "HONDURAS": "1fee433d-5093-4752-bd18-019be82ebcae",
92
+ "HONG KONG SAR": "2ecd2c23-33ff-4624-80d3-900777e7801e",
93
+ "HUNGARY": "5d2f2361-562a-4f59-84f7-a8dabb127add",
94
+ "ICELAND": "e6ff784e-9de7-44a8-9f67-2c3beecd2efc",
95
+ "INDIA": "f51f4255-1b0b-4a1b-be01-4bc82909c33e",
96
+ "INDONESIA": "66c0c992-6f20-4464-afa0-160e48985792", # Note: Indonesia in the list
97
+ "IRAN": "66c0c992-6f20-4464-afa0-160e48985792",
98
+ "IRAQ": "d106dd98-25c4-489d-ae37-a73bdf0b4b7d",
99
+ "IRELAND": "43f85fba-b363-4c2e-aa27-83fb1a9da94c",
100
+ "ITALY": "a24388f2-c0a5-4807-894c-de6823a79a3f",
101
+ "IVORY COAST": "7dc9c73c-4c60-4ebc-bd41-13010e8c2952",
102
+ "JAMAICA": "652d4cc5-c90a-44d0-866e-94167c3dbd3b",
103
+ "JAPAN": "1beaa902-fae3-4733-984d-437375211a3d",
104
+ "JORDAN": "4c108286-8eb9-48dc-9590-4a0a3cfe2b40",
105
+ "KAZAKHSTAN": "4f68af57-24fc-4da3-9524-5a344f7ade7d",
106
+ "KENYA": "009337db-a9ad-43af-b823-c95aca6798b9",
107
+ "KUWAIT": "6d8dc2ec-dbc0-4416-a3e1-553935ee4f23",
108
+ "KYRGYZSTAN": "6214e7cd-b344-4698-ae88-d32b80cc26f3",
109
+ "LAOS": "50ebf46b-2f8e-4345-bc0c-461857b06847",
110
+ "LATVIA": "af83f9ed-713c-4ae6-a588-c3c79fe58d44",
111
+ "LEBANON": "477fc793-7ad7-4955-af53-09d9ffd197b8",
112
+ "LESOTHO": "ceef5a50-b212-4f03-b132-1db4fa3ee8d9",
113
+ "LIBYA": "a6f469cc-61ca-4640-a200-350e33630a7a",
114
+ "LIECHTENSTEIN": "97dc8df9-6382-47c4-abcb-af92e0833ab3",
115
+ "LITHUANIA": "f0127376-b8cc-49b6-ba94-90137d5c9912",
116
+ "LUXEMBOURG": "b2ac4801-ca60-4fd1-9a09-9de023939a17",
117
+ "MACAO SAR": "63ad0853-32f7-444a-94d3-1db1515f6411",
118
+ "MACEDONIA": "ca2e8749-8c9c-48ea-8dbe-1d3733ffaf70",
119
+ "MADAGASCAR": "22f9fd8e-f208-49c9-8afd-968066319f12",
120
+ "MALAWI": "d2a7beda-5424-4d51-ae10-f37ca2f470c5",
121
+ "MALAYSIA": "18ea95db-04a0-4564-a871-2d89bf9b6bd5",
122
+ "MALDIVES": "96304e76-87c2-4ea2-979d-d1dd0a3a8e09",
123
+ "MALI": "5e7032d6-4a29-412a-b1f0-6a88556862be",
124
+ "MALTA": "71757f61-a945-4ab0-bccd-3eba9e29ce9b",
125
+ "MAURITANIA": "b5ec75db-b92d-4b7e-ad2b-d97d132d00cb",
126
+ "MAURITIUS": "a8c9190a-f11a-42f6-914e-32907a9b8c50",
127
+ "MEXICO": "89b96bc1-e4ee-433c-953a-d5c5316ad3d8",
128
+ "MOLDOVA": "e8548c39-a894-4283-8792-1c54b601bd13",
129
+ "MONACO": "c17c385d-793f-42bf-be59-dd910d88e345",
130
+ "MONGOLIA": "c9464187-3364-4455-9637-a4801beb1293",
131
+ "MONTENEGRO": "b12556a9-7ee3-43b8-8630-7b99643d2419",
132
+ "MOROCCO": "ae28d967-9143-4a3c-88c9-32ecda8a5f14",
133
+ "MOZAMBIQUE": "aa50af6e-ffda-421a-8b07-bbd8376ffcbe",
134
+ "MYANMAR": "63419316-d037-4edc-ab35-d3bd8da1487c",
135
+ "NAMIBIA": "bb48769c-3071-42bb-813e-645f18b1ff0b",
136
+ "NEPAL": "8da9858f-bdf2-4b6b-9429-238ebec039dd",
137
+ "NETHERLANDS": "220ac96a-b9c9-4f77-8711-677004adaee2",
138
+ "NEW ZEALAND": "ef268b73-8b9e-478f-a459-32058e33b1f3",
139
+ "NICARAGUA": "6206cd29-800f-46b1-8a8d-a8cd70123281",
140
+ "NIGER": "edc16d7a-71ac-45ac-8e22-315f2ec89d96",
141
+ "NIGERIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
142
+ "NORTH MACEDONIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
143
+ "NORWAY": "f322b0c9-61a5-4ee8-9cdb-f046a5245e6b",
144
+ "OMAN": "6aa75c1b-e7eb-43f8-88d7-1f0ad5764b43",
145
+ "PAKISTAN": "d6adb2da-a3d0-4143-bcc5-1651b0671851",
146
+ "PALAU": "90782348-c645-4400-84cb-2c4307e2d374",
147
+ "PALESTINE": "e7b5ec63-4203-4668-a377-dd2b526682df",
148
+ "PANAMA": "ec76dc7f-faca-4bfd-b76d-3de68e0cf280",
149
+ "PAPUA NEW GUINEA": "275b19e4-50a1-41c7-b6d8-cfbbdb03d046",
150
+ "PARAGUAY": "e58d82e1-a65c-410a-ac8a-866c6f25da9f",
151
+ "PERU": "e5f942ed-23fd-4f2d-a5d4-97399753ec08",
152
+ "PHILIPPINES": "8db55c27-d5c2-4cf7-bbaa-6db35fe3863d",
153
+ "POLAND": "733b9eb2-8fea-49ab-9b7d-ba9d56ac0108",
154
+ "PORTUGAL": "2fe14725-2c04-4c6a-963c-dcde840dcfd5",
155
+ "QATAR": "ff4c5ef0-b98c-4316-93a1-401d0e503d23",
156
+ "REPUBLIC OF KOREA": "0e63775f-7370-4203-9ff4-8a7ce1b83da5",
157
+ "REPUBLIC OF SOUTH AFRICA": "89ee80a3-b660-44cc-b09d-9e8b02df327c",
158
+ "ROMANIA": "cd630114-9710-49a8-a450-9eff8b9f7966",
159
+ "RUSSIA": "c9927f33-3778-4955-a5ed-5be3fe3c2c86",
160
+ "RWANDA": "fb80a5a2-7da4-4684-9a08-ac9935a7687c",
161
+ "SAINT KITTS AND NEVIS": "652bdc10-1147-4da7-b87d-0a2d87f456ff",
162
+ "SAINT LUCIA": "b72c2f44-6a70-44cc-92c9-e14f791ed3a0",
163
+ "SAINT VINCENT AND GRENADINES": "57c54eb4-4317-4873-b20f-9eca9db38461",
164
+ "SAMOA": "1ff2297d-ca8a-41d0-8ed3-835773306c91",
165
+ "SAN MARINO": "1bd460aa-a6f7-4517-b557-68c9a5c4b575",
166
+ "SAO TOME AND PRINCIPE": "a6945046-ca8d-4df0-9d85-76605233e7cd",
167
+ "SAUDI ARABIA": "36cb6d47-cfd4-42a0-ac3d-398d76f932c3",
168
+ "SENEGAL": "02b4584f-7c26-4b8f-8d07-7d79552045a7",
169
+ "SERBIA": "77514d20-e823-4134-9be1-663279285570",
170
+ "SEYCHELLES": "9a5bf6eb-bdd3-46ce-9298-325ee83c6e8a",
171
+ "SIERRA LEONE": "536c5bbc-9d92-4831-856f-0b4183866560",
172
+ "SINGAPORE": "76f0332d-ff00-4e69-acda-73d153c6ce66",
173
+ "SLOVAKIA": "2c48b68b-5184-4d9f-9635-b4376855f832",
174
+ "SLOVENIA": "95c29f1b-464b-4985-b5f2-9589e977cd65",
175
+ "SOLOMON ISLANDS": "a6cc707d-cb28-4fff-aca0-8d7cb53e003b",
176
+ "SOUTH SUDAN": "4a72d20d-8764-4f24-a6c5-5733637e0947",
177
+ "SPAIN": "a7391e9c-7a1f-4184-bb40-b8d82b001643",
178
+ "SRI LANKA": "c0d856c9-bac0-4566-aeb6-db30d16b611c",
179
+ "SUDAN": "f5d95a4d-2857-44fe-a124-1c041aab463a",
180
+ "SURINAME": "0697ba08-aea2-40cb-b54a-4dddf66cc568",
181
+ "SWEDEN": "5285ce98-204f-42c7-9a8b-0ce793992540",
182
+ "SWITZERLAND": "4b8a01ac-ebac-4157-ba96-52a42452b8fc",
183
+ "SYRIA": "a4c2a6b9-599e-4f32-a2cb-a7a8c419d3fd",
184
+ "TAIWAN": "cab7f051-6e1e-40e8-bc1d-304c7d718205",
185
+ "TAJIKISTAN": "4e06dd54-cb40-4689-ba37-d52ee18b6232",
186
+ "TANZANIA": "781d5b7f-8d72-4ab6-bb85-9d84c7002655",
187
+ "THAILAND": "568938de-3757-4038-ab10-9515de53dcde",
188
+ "TIMOR LESTE": "92991bf5-3eb4-4bcd-a9cd-eb3791f61855",
189
+ "TOGO": "4472c324-c691-41ef-993f-1fdc68bdaeb4",
190
+ "TONGA": "5da97514-d2f2-49e2-8b3c-c2cd837076c9",
191
+ "TRINIDAD AND TOBAGO": "7052f57f-ed0f-4af7-a2bd-c56add55c58b",
192
+ "TUNISIA": "d43d13b8-c1f6-4b99-96fc-46f8566cf2f8",
193
+ "TURKEY": "ef010e72-e3d1-47af-81eb-c801725dfd56",
194
+ "TURKMENISTAN": "d86a010c-3d78-4e3c-a0a2-a8c28ba2faa5",
195
+ "UGANDA": "77291ff1-eab3-4a48-b4f8-a554edb33c54",
196
+ "UKRAINE": "e01dfda0-5841-4395-8073-995bac33530c",
197
+ "UNITED ARAB EMIRATES": "97ed438c-9faf-4f9c-a407-72e3718ca022",
198
+ "UNITED KINGDOM": "94ce109e-b072-4229-9285-cf9cacc8fb2d",
199
+ "UNITED STATES OF AMERICA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7",
200
+ "USA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7", # Alias
201
+ "URUGUAY": "2b5383fd-857a-4d13-9da2-6cc8817d94e4",
202
+ "UZBEKISTAN": "78743012-4d5f-499c-b1f7-87a30df735f9",
203
+ "VANUATU": "402de03b-eba7-4fca-ab14-5a1c8efeeec7",
204
+ "VENEZUELA": "41b2e501-26aa-4787-97fa-56af1e94b82c",
205
+ "VIETNAM": "e3838f92-2f1d-449c-b6b8-76b089027f0a",
206
+ "YEMEN": "e8324fd1-9cee-4772-97af-651408c0bab9",
207
+ "ZAMBIA": "16543ea3-11bd-4551-9d9e-44abbec77c25",
208
+ "ZIMBABWE": "e0ea4fa1-498e-4d95-89eb-24415907dcab"
209
+ }
210
+
211
+ # Parent Activity mapping (Step 0)
212
+ PARENT_ACTIVITY_MAPPING = {
213
+ "General, Family, or Social": "d5bc2168-2f4a-4396-8eae-3d895a0508e9",
214
+ "Investment, Business, or Government": "f7a8ac1d-a71f-45d3-919f-985e295533f2",
215
+ "Golden Visa": "63350ecf-72a1-4fd1-8674-d42f815615fd",
216
+ "Diaspora": "58d2cbb5-423a-4f8e-8e3c-bcddd9f7980e",
217
+ "Silver Hair & Retirement": "ad33081b-7c1d-4c07-a7ed-aa36f0b54bb3",
218
+ "Second Home": "1bb683e9-bb81-4a85-9651-ba4d4174ff0e",
219
+ "Professional or Employment": "40d4fdc7-4117-48ff-9ed6-3950088fc760",
220
+ "Journalist or Film": "01c17cd8-912b-457f-b3cc-d8919ab8964b",
221
+ "Sport or Performer": "ec91d849-02c7-4d1d-831b-ac0764ab8cc5",
222
+ "Study, Courses, Training or Research": "2221fe46-ea42-4d2f-a332-899e60ef6fe2",
223
+ "Work and Holiday": "f9b4e188-f90e-4f3d-bace-71c2c27d5159"
224
+ }
225
+
226
+ def __init__(self):
227
+ self.headers = {
228
+ "Accept": "application/json, text/plain, */*",
229
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
230
+ "Referer": "https://evisa.imigrasi.go.id/",
231
+ "Origin": "https://evisa.imigrasi.go.id",
232
+ "Accept-Language": "en-US,en;q=0.9",
233
+ "Accept-Encoding": "gzip, deflate, br"
234
+ }
235
+
236
+ def get_sub_activities(self, parent_activity_id: str) -> Optional[List[Dict]]:
237
+ """
238
+ Step 0: Get sub-activities for a parent activity
239
+ """
240
+ payload = {
241
+ "parent_id": parent_activity_id,
242
+ "step": "0"
243
+ }
244
+
245
+ try:
246
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
247
+ response.raise_for_status()
248
+ data = response.json()
249
+
250
+ if "data" in data:
251
+ return data["data"]
252
+ return None
253
+ except Exception as e:
254
+ print(f"Error fetching sub-activities: {e}")
255
+ return None
256
+
257
+ def _extract_stay_and_cost_from_html(self, html_content: str) -> Dict[str, Optional[str]]:
258
+ """Parses raw HTML to find and extract summary text for Stay and Cost."""
259
+ summary = {"stay": None, "cost": None}
260
+ print("\n--- DEBUG: Parsing HTML for Stay/Cost ---")
261
+ print(html_content)
262
+ print("-----------------------------------------\n")
263
+ if not html_content:
264
+ return summary
265
+
266
+ soup = BeautifulSoup(html_content, 'html.parser')
267
+
268
+ for strong_tag in soup.find_all('strong'):
269
+ heading_text = strong_tag.get_text(strip=True).lower()
270
+
271
+ # Navigate up to the top-level parent block of the heading
272
+ parent_block = strong_tag
273
+ while parent_block.parent and parent_block.parent.name != '[document]':
274
+ parent_block = parent_block.parent
275
+
276
+ # The value is in the next sibling block
277
+ next_block = parent_block.find_next_sibling()
278
+ value = next_block.get_text(separator=' ', strip=True) if next_block else "Not specified"
279
+
280
+ if 'stay' in heading_text:
281
+ summary['stay'] = value
282
+ elif 'cost' in heading_text:
283
+ summary['cost'] = value
284
+
285
+ return summary
286
+
287
+ def fix_html_structure(self, html_content: str) -> str:
288
+ """
289
+ Cleans and intelligently restructures malformed HTML from the source API.
290
+ It groups headings with their subsequent content into single <li> elements.
291
+ """
292
+ if not html_content:
293
+ return ""
294
+
295
+ soup = BeautifulSoup(html_content, 'html.parser')
296
+
297
+ final_list_items = []
298
+ current_li_buffer = []
299
+
300
+ def is_heading(element):
301
+ """Heuristic to determine if an element is a section heading."""
302
+ if isinstance(element, NavigableString):
303
+ return False
304
+ # A heading is identified by the presence of a <strong> tag.
305
+ return element.find('strong') is not None
306
+
307
+ def flush_buffer():
308
+ """Processes the buffer and adds its content as a single <li>."""
309
+ if current_li_buffer:
310
+ content = "".join(str(el) for el in current_li_buffer)
311
+ final_list_items.append(f"<li>{content}</li>")
312
+ current_li_buffer.clear()
313
+
314
+ for element in soup.contents:
315
+ if not str(element).strip(): # Skip empty whitespace nodes
316
+ continue
317
+
318
+ if is_heading(element) and current_li_buffer:
319
+ flush_buffer()
320
+
321
+ current_li_buffer.append(element)
322
+
323
+ flush_buffer() # Flush the last remaining item
324
+
325
+ rebuilt_html = "".join(final_list_items)
326
+ rebuilt_html = rebuilt_html.replace('</ol></li>', '</ol>') # Clean up known stray tags
327
+ return f"<ul>{rebuilt_html}</ul>"
328
+
329
+ def get_visa_types(self, activity_id: str, country_id: str) -> Optional[Dict]:
330
+ """
331
+ Step 1: Get available visa types for activity and country
332
+ """
333
+ payload = {
334
+ "activity_id": activity_id,
335
+ "country_id": country_id,
336
+ "step": "1"
337
+ }
338
+
339
+ try:
340
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
341
+ response.raise_for_status()
342
+ data = response.json()
343
+
344
+ # Handle both dict and list responses
345
+ if isinstance(data, dict):
346
+ if data.get("status") == "success":
347
+ # Enrich visa types with stay and cost summaries
348
+ visa_list = data.get("data", [])
349
+ for visa in visa_list:
350
+ details_data = self.get_visa_details(visa['id'])
351
+ if details_data and "visaType" in details_data and details_data["visaType"]:
352
+ info_html = details_data["visaType"][0].get("info", "")
353
+ summary = self._extract_stay_and_cost_from_html(info_html)
354
+ visa['stay_summary'] = summary.get('stay')
355
+ visa['cost_summary'] = summary.get('cost')
356
+ else:
357
+ visa['stay_summary'] = "N/A"
358
+ visa['cost_summary'] = "N/A"
359
+ data['data'] = visa_list
360
+ return data
361
+ elif data.get("status") == "empty":
362
+ return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
363
+ elif isinstance(data, list):
364
+ # API returns ["status","empty",false] for empty responses
365
+ print(data)
366
+ if len(data) >= 2 and data[1] == "empty":
367
+ return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
368
+ return None
369
+ except Exception as e:
370
+ print(f"Error fetching visa types: {e} {data}")
371
+ return None
372
+
373
+ def get_visa_details(self, visa_type_id: str) -> Optional[Dict]:
374
+ """
375
+ Step 2: Get detailed information for a specific visa type
376
+ """
377
+ payload = {
378
+ "visa_type_id": visa_type_id,
379
+ "step": "2"
380
+ }
381
+
382
+ try:
383
+ response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
384
+ response.raise_for_status()
385
+ data = response.json()
386
+
387
+ if data.get("status") == "success":
388
+ return data["data"]
389
+ return None
390
+ except Exception as e:
391
+ print(f"Error fetching visa details: {e}")
392
+ return None
393
+
394
+
395
+ def get_country_id(self, country_name: str) -> Optional[str]:
396
+ """
397
+ Get country ID from country name
398
+ """
399
+ country_upper = country_name.upper()
400
+ return self.COUNTRY_MAPPING.get(country_upper)
401
+
402
+ def get_parent_activity_id(self, activity_name: str) -> Optional[str]:
403
+ """
404
+ Get parent activity ID from activity name
405
+ """
406
+ return self.PARENT_ACTIVITY_MAPPING.get(activity_name)
407
+
408
+ def get_full_visa_info(self, country_name: str, parent_activity_name: str,
409
+ sub_activity_id: Optional[str] = None) -> Dict[str, Any]:
410
+ """
411
+ Get complete visa information through the entire flow
412
+ """
413
+ result = {
414
+ "success": False,
415
+ "country": country_name,
416
+ "parent_activity": parent_activity_name,
417
+ "data": None,
418
+ "error": None
419
+ }
420
+
421
+ # Get country ID
422
+ country_id = self.get_country_id(country_name)
423
+ if not country_id:
424
+ result["error"] = f"Country '{country_name}' not found"
425
+ return result
426
+
427
+ # Get parent activity ID
428
+ parent_activity_id = self.get_parent_activity_id(parent_activity_name)
429
+ if not parent_activity_id:
430
+ result["error"] = f"Activity '{parent_activity_name}' not found"
431
+ return result
432
+
433
+ # Step 0: Get sub-activities
434
+ sub_activities = self.get_sub_activities(parent_activity_id)
435
+ if not sub_activities:
436
+ result["error"] = "Failed to fetch sub-activities"
437
+ return result
438
+
439
+ result["data"] = {
440
+ "sub_activities": sub_activities,
441
+ "visa_types": [],
442
+ "selected_sub_activity": None
443
+ }
444
+
445
+ # If sub_activity_id provided, get visa types
446
+ if sub_activity_id:
447
+ visa_types_data = self.get_visa_types(sub_activity_id, country_id)
448
+
449
+ if visa_types_data:
450
+ if visa_types_data.get("status") == "empty":
451
+ result["data"]["message"] = visa_types_data.get("message")
452
+ else:
453
+ result["data"]["visa_types"] = visa_types_data.get("data", [])
454
+ result["data"]["all_visa_info"] = visa_types_data.get("all", [])
455
+ result["data"]["selected_sub_activity"] = sub_activity_id
456
+
457
+ result["success"] = True
458
+ return result
459
+
460
+ def get_visa_full_details(self, visa_type_id: str) -> Dict[str, Any]:
461
+ """
462
+ Get complete details for a specific visa type
463
+ """
464
+ result = {
465
+ "success": False,
466
+ "data": None,
467
+ "error": None
468
+ }
469
+
470
+ details = self.get_visa_details(visa_type_id)
471
+ if details:
472
+ # Parse HTML content if present
473
+ if "visaType" in details and details["visaType"]:
474
+ visa_info = details["visaType"][0]
475
+
476
+ # The source HTML is malformed, so we must clean and restructure it.
477
+ if "info" in visa_info:
478
+ visa_info["info_html"] = self.fix_html_structure(visa_info.get("info", ""))
479
+
480
+ if "information" in visa_info:
481
+ visa_info["information_html"] = self.fix_html_structure(visa_info.get("information", ""))
482
+
483
+ result["data"] = visa_info
484
+ result["success"] = True
485
+ else:
486
+ result["error"] = "Failed to fetch visa details"
487
+
488
+ return result