streamlit-web-crawler / src /visa_scraper.py
exorcist123's picture
test
8750431
# visa_scraper.py
import requests
from typing import Dict, List, Optional, Any
from bs4 import BeautifulSoup, NavigableString
class IndonesianVisaScraper:
"""
Scraper for Indonesian visa information from evisa.imigrasi.go.id
"""
BASE_URL = "https://evisa.imigrasi.go.id/web/visa-selection/data"
# Country ID mapping
COUNTRY_MAPPING = {
"ALBANIA": "df26b6b5-b957-44fc-8775-5a307aff676c",
"ALGERIA": "ee9a47a3-c229-4384-a00e-c5d3132a2b6a",
"AMERICAN SAMOA": "f57b1c29-b107-402d-a33c-2198d61dee4a",
"ANDORRA": "7fc48e74-1a09-4edd-9bca-d6622ea82b1e",
"ANGOLA": "2cd8b0e4-f9a4-44f5-8014-23f1f03d3193",
"ANGUILLA": "7d9c632d-106a-40b5-b4de-d46572bdb18e",
"ANTIGUA AND BARBUDA": "dab42168-cd9e-4e6f-86ec-a1a773d34684",
"ARGENTINA": "7a0eaa53-353b-4206-9299-1badcdb0fdf1",
"ARMENIA": "e7bcd2e1-fb4b-4748-8fce-b2d0e3b3827c",
"AUSTRALIA": "46283cb1-d406-47c1-86fd-8c308ffa173a",
"AUSTRIA": "727e690b-fc21-4b8a-b17f-1de1616433b7",
"AZERBAIJAN": "a11144b1-8228-42d5-8242-a0f529c0de10",
"BAHAMAS": "0685586c-7e37-4aa1-8020-f4aa453e9472",
"BAHRAIN": "f3373174-008f-4f5c-b902-e79763719c64",
"BANGLADESH": "a14de768-1e37-4d93-8def-352266de349a",
"BARBADOS": "02054c21-3f67-4fe3-95bc-d20106985e49",
"BELARUS": "9ff91215-8e06-45b4-8f88-f7685d249358",
"BELGIUM": "94629b00-f4c4-46bd-b263-cc7c53f8b9d9",
"BELIZE": "ada414e8-6fd5-4b69-82eb-89472e20d3be",
"BENIN": "88a03b34-1b9e-4bd6-ba7b-2a7bd8838c24",
"BERMUDA": "50425995-e990-47c5-8e5e-882840ef2b7a",
"BHUTAN": "4528297b-9315-45f8-adf1-3a5f0b7e994f",
"BOLIVIA": "bdc856ae-6948-4381-9648-8fc41bef7577",
"BOSNIA AND HERZEGOVINA": "fa943b74-22e9-4650-bccb-14b466bde722",
"BOTSWANA": "f98613a2-3816-4af6-8d27-e7edc2e67366",
"BRAZIL": "cf4dfdbe-327b-4c9a-bd39-441a338cd276",
"BRUNEI DARUSSALAM": "687653fc-236b-4bc8-baf8-88a81f17618a",
"BULGARIA": "7345359f-4f82-4089-a153-acf32254b0a3",
"BURKINA FASO": "83c3ceed-af1c-4b06-bb4a-0217712043c8",
"BURUNDI": "ff78c621-fa7f-467b-812c-9912e88ac430",
"CAMBODIA": "3f387ed5-13e6-42b8-ba77-84f281d995f7",
"CAMEROON": "1d91caf5-bcf2-4e4b-8c77-7d62781bf220",
"CANADA": "47db151f-ce3e-406d-b707-8fcdfd088a61",
"CAPE VERDE": "3ecf1c2c-de95-4811-9e5e-4b03816a108a",
"CENTRAL AFRICAN REPUBLIC": "0c72f1ee-2e31-4713-86ac-e079c0f994e7",
"CHAD": "abbd5561-fde5-42e5-8237-382ab808ec8f",
"CHILE": "0c597abe-9cf2-4171-8449-06bcc928afd8",
"CHINA": "883891b6-1c3f-41c3-9279-4745f63575cb",
"COLOMBIA": "76578a13-3e64-41da-8d91-deef561311c2",
"COMOROS": "e350c59c-5236-4207-8ef9-88e4aebeda9d",
"CONGO REPUBLIC": "41d426c7-b47f-4e73-a810-dc78bcaae82f",
"COSTA RICA": "d16f1de8-e959-4569-89ab-0e165456eeb0",
"CROATIA": "a93f74b3-fcc6-473c-89f9-0e7af7d7f6b6",
"CUBA": "4f6e354a-ca46-49f3-b035-8188b2d1f6b6",
"CYPRUS": "8e5508a3-0173-4be4-bdc8-23986b9179d7",
"CZECH REPUBLIC": "318736c7-f14f-4560-a65a-8c2304ea6805",
"DEMOCRATIC REPUBLIC OF CONGO": "7d5d09ba-f7a2-4780-a730-ac5749aeae88",
"DENMARK": "25d567a3-7a4e-4248-9800-647028da9b71",
"DJIBOUTI": "79c7ec19-d69c-48f4-9cbc-96110e24667d",
"DOMINICA": "ac9f23a8-fd8e-4877-bab8-367ce26cbb06",
"DOMINICAN REPUBLIC": "0df2afd4-1945-40c3-a3bb-bcc1fa8ab081",
"ECUADOR": "c7339e3d-cbe9-4893-af6f-0bdc4b6ebd26",
"EGYPT": "29a5479e-3e9a-4fe6-a4b2-705de1654fb7",
"EL SALVADOR": "c20c9d1a-3d42-48c1-8033-12a837058ba6",
"EQUATORIAL GUINEA": "70003abd-8f1e-4baa-b513-a406405c85bd",
"ERITREA": "e9ab2043-0c97-4c52-b103-8118861898d0",
"ESTONIA": "448fa696-e0ca-4e5f-89cd-671f04d48c9f",
"ESWATINI": "033731b5-112f-49bc-a182-7b1429049dbf",
"ETHIOPIA": "a0a10b9e-3157-4d65-84c3-e96bc8a99979",
"FIJI": "67f91d7d-5166-44d3-b050-45fe4fa6bad7",
"FINLAND": "52006b57-7b02-4902-b55d-0dadc577b75c",
"FRANCE": "23e8665d-2f2c-4841-acf6-6a2cac358ed4",
"GABON": "38381f24-d8fb-42e4-ba80-f0b1451e5577",
"GAMBIA": "41d04ba0-6764-4b55-88ed-e1f776743128",
"GEORGIA": "f851c74a-b9dc-4d9d-a99c-65b6a69afe95",
"GERMANY": "2d7c66c1-9341-459c-83a8-8138766b133c",
"GHANA": "012e51a0-4e9c-4dfd-8438-b4cc375fb0ed",
"GREECE": "47aa18d9-ed29-4250-9206-6f0cdaec94ff",
"GRENADA": "9d14a529-4116-449f-818f-538994ae14c8",
"GUATEMALA": "0159ccc8-a34c-4a6d-846f-510296a43536",
"GUINEA": "5cd3306f-e3de-4031-b1f4-fb5d07e4de70",
"GUINEA-BISSAU": "e04e1e50-e2a8-4b11-97c3-f0c8d759af9a",
"GUYANA": "77774317-99e1-43c3-ada2-62aaf1a22f81",
"HAITI": "91d309f2-de3f-40bb-80e7-4b68077fdc33",
"HONDURAS": "1fee433d-5093-4752-bd18-019be82ebcae",
"HONG KONG SAR": "2ecd2c23-33ff-4624-80d3-900777e7801e",
"HUNGARY": "5d2f2361-562a-4f59-84f7-a8dabb127add",
"ICELAND": "e6ff784e-9de7-44a8-9f67-2c3beecd2efc",
"INDIA": "f51f4255-1b0b-4a1b-be01-4bc82909c33e",
"INDONESIA": "66c0c992-6f20-4464-afa0-160e48985792", # Note: Indonesia in the list
"IRAN": "66c0c992-6f20-4464-afa0-160e48985792",
"IRAQ": "d106dd98-25c4-489d-ae37-a73bdf0b4b7d",
"IRELAND": "43f85fba-b363-4c2e-aa27-83fb1a9da94c",
"ITALY": "a24388f2-c0a5-4807-894c-de6823a79a3f",
"IVORY COAST": "7dc9c73c-4c60-4ebc-bd41-13010e8c2952",
"JAMAICA": "652d4cc5-c90a-44d0-866e-94167c3dbd3b",
"JAPAN": "1beaa902-fae3-4733-984d-437375211a3d",
"JORDAN": "4c108286-8eb9-48dc-9590-4a0a3cfe2b40",
"KAZAKHSTAN": "4f68af57-24fc-4da3-9524-5a344f7ade7d",
"KENYA": "009337db-a9ad-43af-b823-c95aca6798b9",
"KUWAIT": "6d8dc2ec-dbc0-4416-a3e1-553935ee4f23",
"KYRGYZSTAN": "6214e7cd-b344-4698-ae88-d32b80cc26f3",
"LAOS": "50ebf46b-2f8e-4345-bc0c-461857b06847",
"LATVIA": "af83f9ed-713c-4ae6-a588-c3c79fe58d44",
"LEBANON": "477fc793-7ad7-4955-af53-09d9ffd197b8",
"LESOTHO": "ceef5a50-b212-4f03-b132-1db4fa3ee8d9",
"LIBYA": "a6f469cc-61ca-4640-a200-350e33630a7a",
"LIECHTENSTEIN": "97dc8df9-6382-47c4-abcb-af92e0833ab3",
"LITHUANIA": "f0127376-b8cc-49b6-ba94-90137d5c9912",
"LUXEMBOURG": "b2ac4801-ca60-4fd1-9a09-9de023939a17",
"MACAO SAR": "63ad0853-32f7-444a-94d3-1db1515f6411",
"MACEDONIA": "ca2e8749-8c9c-48ea-8dbe-1d3733ffaf70",
"MADAGASCAR": "22f9fd8e-f208-49c9-8afd-968066319f12",
"MALAWI": "d2a7beda-5424-4d51-ae10-f37ca2f470c5",
"MALAYSIA": "18ea95db-04a0-4564-a871-2d89bf9b6bd5",
"MALDIVES": "96304e76-87c2-4ea2-979d-d1dd0a3a8e09",
"MALI": "5e7032d6-4a29-412a-b1f0-6a88556862be",
"MALTA": "71757f61-a945-4ab0-bccd-3eba9e29ce9b",
"MAURITANIA": "b5ec75db-b92d-4b7e-ad2b-d97d132d00cb",
"MAURITIUS": "a8c9190a-f11a-42f6-914e-32907a9b8c50",
"MEXICO": "89b96bc1-e4ee-433c-953a-d5c5316ad3d8",
"MOLDOVA": "e8548c39-a894-4283-8792-1c54b601bd13",
"MONACO": "c17c385d-793f-42bf-be59-dd910d88e345",
"MONGOLIA": "c9464187-3364-4455-9637-a4801beb1293",
"MONTENEGRO": "b12556a9-7ee3-43b8-8630-7b99643d2419",
"MOROCCO": "ae28d967-9143-4a3c-88c9-32ecda8a5f14",
"MOZAMBIQUE": "aa50af6e-ffda-421a-8b07-bbd8376ffcbe",
"MYANMAR": "63419316-d037-4edc-ab35-d3bd8da1487c",
"NAMIBIA": "bb48769c-3071-42bb-813e-645f18b1ff0b",
"NEPAL": "8da9858f-bdf2-4b6b-9429-238ebec039dd",
"NETHERLANDS": "220ac96a-b9c9-4f77-8711-677004adaee2",
"NEW ZEALAND": "ef268b73-8b9e-478f-a459-32058e33b1f3",
"NICARAGUA": "6206cd29-800f-46b1-8a8d-a8cd70123281",
"NIGER": "edc16d7a-71ac-45ac-8e22-315f2ec89d96",
"NIGERIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
"NORTH MACEDONIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f",
"NORWAY": "f322b0c9-61a5-4ee8-9cdb-f046a5245e6b",
"OMAN": "6aa75c1b-e7eb-43f8-88d7-1f0ad5764b43",
"PAKISTAN": "d6adb2da-a3d0-4143-bcc5-1651b0671851",
"PALAU": "90782348-c645-4400-84cb-2c4307e2d374",
"PALESTINE": "e7b5ec63-4203-4668-a377-dd2b526682df",
"PANAMA": "ec76dc7f-faca-4bfd-b76d-3de68e0cf280",
"PAPUA NEW GUINEA": "275b19e4-50a1-41c7-b6d8-cfbbdb03d046",
"PARAGUAY": "e58d82e1-a65c-410a-ac8a-866c6f25da9f",
"PERU": "e5f942ed-23fd-4f2d-a5d4-97399753ec08",
"PHILIPPINES": "8db55c27-d5c2-4cf7-bbaa-6db35fe3863d",
"POLAND": "733b9eb2-8fea-49ab-9b7d-ba9d56ac0108",
"PORTUGAL": "2fe14725-2c04-4c6a-963c-dcde840dcfd5",
"QATAR": "ff4c5ef0-b98c-4316-93a1-401d0e503d23",
"REPUBLIC OF KOREA": "0e63775f-7370-4203-9ff4-8a7ce1b83da5",
"REPUBLIC OF SOUTH AFRICA": "89ee80a3-b660-44cc-b09d-9e8b02df327c",
"ROMANIA": "cd630114-9710-49a8-a450-9eff8b9f7966",
"RUSSIA": "c9927f33-3778-4955-a5ed-5be3fe3c2c86",
"RWANDA": "fb80a5a2-7da4-4684-9a08-ac9935a7687c",
"SAINT KITTS AND NEVIS": "652bdc10-1147-4da7-b87d-0a2d87f456ff",
"SAINT LUCIA": "b72c2f44-6a70-44cc-92c9-e14f791ed3a0",
"SAINT VINCENT AND GRENADINES": "57c54eb4-4317-4873-b20f-9eca9db38461",
"SAMOA": "1ff2297d-ca8a-41d0-8ed3-835773306c91",
"SAN MARINO": "1bd460aa-a6f7-4517-b557-68c9a5c4b575",
"SAO TOME AND PRINCIPE": "a6945046-ca8d-4df0-9d85-76605233e7cd",
"SAUDI ARABIA": "36cb6d47-cfd4-42a0-ac3d-398d76f932c3",
"SENEGAL": "02b4584f-7c26-4b8f-8d07-7d79552045a7",
"SERBIA": "77514d20-e823-4134-9be1-663279285570",
"SEYCHELLES": "9a5bf6eb-bdd3-46ce-9298-325ee83c6e8a",
"SIERRA LEONE": "536c5bbc-9d92-4831-856f-0b4183866560",
"SINGAPORE": "76f0332d-ff00-4e69-acda-73d153c6ce66",
"SLOVAKIA": "2c48b68b-5184-4d9f-9635-b4376855f832",
"SLOVENIA": "95c29f1b-464b-4985-b5f2-9589e977cd65",
"SOLOMON ISLANDS": "a6cc707d-cb28-4fff-aca0-8d7cb53e003b",
"SOUTH SUDAN": "4a72d20d-8764-4f24-a6c5-5733637e0947",
"SPAIN": "a7391e9c-7a1f-4184-bb40-b8d82b001643",
"SRI LANKA": "c0d856c9-bac0-4566-aeb6-db30d16b611c",
"SUDAN": "f5d95a4d-2857-44fe-a124-1c041aab463a",
"SURINAME": "0697ba08-aea2-40cb-b54a-4dddf66cc568",
"SWEDEN": "5285ce98-204f-42c7-9a8b-0ce793992540",
"SWITZERLAND": "4b8a01ac-ebac-4157-ba96-52a42452b8fc",
"SYRIA": "a4c2a6b9-599e-4f32-a2cb-a7a8c419d3fd",
"TAIWAN": "cab7f051-6e1e-40e8-bc1d-304c7d718205",
"TAJIKISTAN": "4e06dd54-cb40-4689-ba37-d52ee18b6232",
"TANZANIA": "781d5b7f-8d72-4ab6-bb85-9d84c7002655",
"THAILAND": "568938de-3757-4038-ab10-9515de53dcde",
"TIMOR LESTE": "92991bf5-3eb4-4bcd-a9cd-eb3791f61855",
"TOGO": "4472c324-c691-41ef-993f-1fdc68bdaeb4",
"TONGA": "5da97514-d2f2-49e2-8b3c-c2cd837076c9",
"TRINIDAD AND TOBAGO": "7052f57f-ed0f-4af7-a2bd-c56add55c58b",
"TUNISIA": "d43d13b8-c1f6-4b99-96fc-46f8566cf2f8",
"TURKEY": "ef010e72-e3d1-47af-81eb-c801725dfd56",
"TURKMENISTAN": "d86a010c-3d78-4e3c-a0a2-a8c28ba2faa5",
"UGANDA": "77291ff1-eab3-4a48-b4f8-a554edb33c54",
"UKRAINE": "e01dfda0-5841-4395-8073-995bac33530c",
"UNITED ARAB EMIRATES": "97ed438c-9faf-4f9c-a407-72e3718ca022",
"UNITED KINGDOM": "94ce109e-b072-4229-9285-cf9cacc8fb2d",
"UNITED STATES OF AMERICA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7",
"USA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7", # Alias
"URUGUAY": "2b5383fd-857a-4d13-9da2-6cc8817d94e4",
"UZBEKISTAN": "78743012-4d5f-499c-b1f7-87a30df735f9",
"VANUATU": "402de03b-eba7-4fca-ab14-5a1c8efeeec7",
"VENEZUELA": "41b2e501-26aa-4787-97fa-56af1e94b82c",
"VIETNAM": "e3838f92-2f1d-449c-b6b8-76b089027f0a",
"YEMEN": "e8324fd1-9cee-4772-97af-651408c0bab9",
"ZAMBIA": "16543ea3-11bd-4551-9d9e-44abbec77c25",
"ZIMBABWE": "e0ea4fa1-498e-4d95-89eb-24415907dcab"
}
# Parent Activity mapping (Step 0)
PARENT_ACTIVITY_MAPPING = {
"General, Family, or Social": "d5bc2168-2f4a-4396-8eae-3d895a0508e9",
"Investment, Business, or Government": "f7a8ac1d-a71f-45d3-919f-985e295533f2",
"Golden Visa": "63350ecf-72a1-4fd1-8674-d42f815615fd",
"Diaspora": "58d2cbb5-423a-4f8e-8e3c-bcddd9f7980e",
"Silver Hair & Retirement": "ad33081b-7c1d-4c07-a7ed-aa36f0b54bb3",
"Second Home": "1bb683e9-bb81-4a85-9651-ba4d4174ff0e",
"Professional or Employment": "40d4fdc7-4117-48ff-9ed6-3950088fc760",
"Journalist or Film": "01c17cd8-912b-457f-b3cc-d8919ab8964b",
"Sport or Performer": "ec91d849-02c7-4d1d-831b-ac0764ab8cc5",
"Study, Courses, Training or Research": "2221fe46-ea42-4d2f-a332-899e60ef6fe2",
"Work and Holiday": "f9b4e188-f90e-4f3d-bace-71c2c27d5159"
}
def __init__(self):
self.headers = {
"Accept": "application/json, text/plain, */*",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://evisa.imigrasi.go.id/",
"Origin": "https://evisa.imigrasi.go.id",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br"
}
def get_sub_activities(self, parent_activity_id: str) -> Optional[List[Dict]]:
"""
Step 0: Get sub-activities for a parent activity
"""
payload = {
"parent_id": parent_activity_id,
"step": "0"
}
try:
response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
response.raise_for_status()
data = response.json()
if "data" in data:
return data["data"]
return None
except Exception as e:
print(f"Error fetching sub-activities: {e}")
return None
def _extract_stay_and_cost_from_html(self, html_content: str) -> Dict[str, Optional[str]]:
"""Parses raw HTML to find and extract summary text for Stay and Cost."""
summary = {"stay": None, "cost": None}
# print("\n--- DEBUG: Parsing HTML for Stay/Cost ---")
# print(html_content)
# print("-----------------------------------------\n")
if not html_content:
return summary
soup = BeautifulSoup(html_content, 'html.parser')
for strong_tag in soup.find_all('strong'):
heading_text = strong_tag.get_text(strip=True).lower()
# Navigate up to the top-level parent block of the heading
parent_block = strong_tag
while parent_block.parent and parent_block.parent.name != '[document]':
parent_block = parent_block.parent
# The value is in the next sibling block
next_block = parent_block.find_next_sibling()
value = "Not specified"
if next_block:
list_items = next_block.find_all('li')
if list_items:
# If the value is a list, clean each item and join with a line break
cleaned_items = [s.get_text(strip=True).replace('(Extendable)', '').strip() for s in list_items if s.get_text(strip=True)]
value = "\n" + "\n".join(f"- {item}" for item in cleaned_items)
else:
# Otherwise, get text from the whole block and clean it
raw_text = next_block.get_text(separator=' ', strip=True)
value = raw_text.replace('(Extendable)', '').strip()
if 'stay' in heading_text:
summary['stay'] = value
elif 'cost' in heading_text:
summary['cost'] = value
return summary
def fix_html_structure(self, html_content: str) -> str:
"""
Cleans and intelligently restructures malformed HTML from the source API.
It groups headings with their subsequent content into single <li> elements.
"""
if not html_content:
return ""
soup = BeautifulSoup(html_content, 'html.parser')
final_list_items = []
current_li_buffer = []
def is_heading(element):
"""Heuristic to determine if an element is a section heading."""
if isinstance(element, NavigableString):
return False
# A heading is identified by the presence of a <strong> tag.
return element.find('strong') is not None
def flush_buffer():
"""Processes the buffer and adds its content as a single <li>."""
if current_li_buffer:
content = "".join(str(el) for el in current_li_buffer)
final_list_items.append(f"<li>{content}</li>")
current_li_buffer.clear()
for element in soup.contents:
if not str(element).strip(): # Skip empty whitespace nodes
continue
if is_heading(element) and current_li_buffer:
flush_buffer()
current_li_buffer.append(element)
flush_buffer() # Flush the last remaining item
rebuilt_html = "".join(final_list_items)
rebuilt_html = rebuilt_html.replace('</ol></li>', '</ol>') # Clean up known stray tags
return f"<ul>{rebuilt_html}</ul>"
def get_visa_types(self, activity_id: str, country_id: str) -> Optional[Dict]:
"""
Step 1: Get available visa types for activity and country
"""
payload = {
"activity_id": activity_id,
"country_id": country_id,
"step": "1"
}
try:
response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
response.raise_for_status()
data = response.json()
# Handle both dict and list responses
if isinstance(data, dict):
if data.get("status") == "success":
# Enrich visa types with stay and cost summaries
visa_list = data.get("data", [])
for visa in visa_list:
details_data = self.get_visa_details(visa['id'])
if details_data and "visaType" in details_data and details_data["visaType"]:
info_html = details_data["visaType"][0].get("info", "")
summary = self._extract_stay_and_cost_from_html(info_html)
visa['stay_summary'] = summary.get('stay')
visa['cost_summary'] = summary.get('cost')
else:
visa['stay_summary'] = "N/A"
visa['cost_summary'] = "N/A"
data['data'] = visa_list
return data
elif data.get("status") == "empty":
return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
elif isinstance(data, list):
# API returns ["status","empty",false] for empty responses
print(data)
if len(data) >= 2 and data[1] == "empty":
return {"status": "empty", "message": "This type of visa must be applied by guarantor."}
return None
except Exception as e:
print(f"Error fetching visa types: {e} {data}")
return None
def get_visa_details(self, visa_type_id: str) -> Optional[Dict]:
"""
Step 2: Get detailed information for a specific visa type
"""
payload = {
"visa_type_id": visa_type_id,
"step": "2"
}
try:
response = requests.post(self.BASE_URL, data=payload, headers=self.headers)
response.raise_for_status()
data = response.json()
if data.get("status") == "success":
return data["data"]
return None
except Exception as e:
print(f"Error fetching visa details: {e}")
return None
def get_country_id(self, country_name: str) -> Optional[str]:
"""
Get country ID from country name
"""
country_upper = country_name.upper()
return self.COUNTRY_MAPPING.get(country_upper)
def get_parent_activity_id(self, activity_name: str) -> Optional[str]:
"""
Get parent activity ID from activity name
"""
return self.PARENT_ACTIVITY_MAPPING.get(activity_name)
def get_full_visa_info(self, country_name: str, parent_activity_name: str,
sub_activity_id: Optional[str] = None) -> Dict[str, Any]:
"""
Get complete visa information through the entire flow
"""
result = {
"success": False,
"country": country_name,
"parent_activity": parent_activity_name,
"data": None,
"error": None
}
# Get country ID
country_id = self.get_country_id(country_name)
if not country_id:
result["error"] = f"Country '{country_name}' not found"
return result
# Get parent activity ID
parent_activity_id = self.get_parent_activity_id(parent_activity_name)
if not parent_activity_id:
result["error"] = f"Activity '{parent_activity_name}' not found"
return result
# Step 0: Get sub-activities
sub_activities = self.get_sub_activities(parent_activity_id)
if not sub_activities:
result["error"] = "Failed to fetch sub-activities"
return result
result["data"] = {
"sub_activities": sub_activities,
"visa_types": [],
"selected_sub_activity": None
}
# If sub_activity_id provided, get visa types
if sub_activity_id:
visa_types_data = self.get_visa_types(sub_activity_id, country_id)
if visa_types_data:
if visa_types_data.get("status") == "empty":
result["data"]["message"] = visa_types_data.get("message")
else:
result["data"]["visa_types"] = visa_types_data.get("data", [])
result["data"]["all_visa_info"] = visa_types_data.get("all", [])
result["data"]["selected_sub_activity"] = sub_activity_id
result["success"] = True
return result
def get_visa_full_details(self, visa_type_id: str) -> Dict[str, Any]:
"""
Get complete details for a specific visa type
"""
result = {
"success": False,
"data": None,
"error": None
}
details = self.get_visa_details(visa_type_id)
if details:
# Parse HTML content if present
if "visaType" in details and details["visaType"]:
visa_info = details["visaType"][0]
# The source HTML is malformed, so we must clean and restructure it.
if "info" in visa_info:
visa_info["info_html"] = self.fix_html_structure(visa_info.get("info", ""))
if "information" in visa_info:
visa_info["information_html"] = self.fix_html_structure(visa_info.get("information", ""))
result["data"] = visa_info
result["success"] = True
else:
result["error"] = "Failed to fetch visa details"
return result