Spaces:
Sleeping
Sleeping
| # visa_scraper.py | |
| import requests | |
| from typing import Dict, List, Optional, Any | |
| from bs4 import BeautifulSoup, NavigableString | |
| class IndonesianVisaScraper: | |
| """ | |
| Scraper for Indonesian visa information from evisa.imigrasi.go.id | |
| """ | |
| BASE_URL = "https://evisa.imigrasi.go.id/web/visa-selection/data" | |
| # Country ID mapping | |
| COUNTRY_MAPPING = { | |
| "ALBANIA": "df26b6b5-b957-44fc-8775-5a307aff676c", | |
| "ALGERIA": "ee9a47a3-c229-4384-a00e-c5d3132a2b6a", | |
| "AMERICAN SAMOA": "f57b1c29-b107-402d-a33c-2198d61dee4a", | |
| "ANDORRA": "7fc48e74-1a09-4edd-9bca-d6622ea82b1e", | |
| "ANGOLA": "2cd8b0e4-f9a4-44f5-8014-23f1f03d3193", | |
| "ANGUILLA": "7d9c632d-106a-40b5-b4de-d46572bdb18e", | |
| "ANTIGUA AND BARBUDA": "dab42168-cd9e-4e6f-86ec-a1a773d34684", | |
| "ARGENTINA": "7a0eaa53-353b-4206-9299-1badcdb0fdf1", | |
| "ARMENIA": "e7bcd2e1-fb4b-4748-8fce-b2d0e3b3827c", | |
| "AUSTRALIA": "46283cb1-d406-47c1-86fd-8c308ffa173a", | |
| "AUSTRIA": "727e690b-fc21-4b8a-b17f-1de1616433b7", | |
| "AZERBAIJAN": "a11144b1-8228-42d5-8242-a0f529c0de10", | |
| "BAHAMAS": "0685586c-7e37-4aa1-8020-f4aa453e9472", | |
| "BAHRAIN": "f3373174-008f-4f5c-b902-e79763719c64", | |
| "BANGLADESH": "a14de768-1e37-4d93-8def-352266de349a", | |
| "BARBADOS": "02054c21-3f67-4fe3-95bc-d20106985e49", | |
| "BELARUS": "9ff91215-8e06-45b4-8f88-f7685d249358", | |
| "BELGIUM": "94629b00-f4c4-46bd-b263-cc7c53f8b9d9", | |
| "BELIZE": "ada414e8-6fd5-4b69-82eb-89472e20d3be", | |
| "BENIN": "88a03b34-1b9e-4bd6-ba7b-2a7bd8838c24", | |
| "BERMUDA": "50425995-e990-47c5-8e5e-882840ef2b7a", | |
| "BHUTAN": "4528297b-9315-45f8-adf1-3a5f0b7e994f", | |
| "BOLIVIA": "bdc856ae-6948-4381-9648-8fc41bef7577", | |
| "BOSNIA AND HERZEGOVINA": "fa943b74-22e9-4650-bccb-14b466bde722", | |
| "BOTSWANA": "f98613a2-3816-4af6-8d27-e7edc2e67366", | |
| "BRAZIL": "cf4dfdbe-327b-4c9a-bd39-441a338cd276", | |
| "BRUNEI DARUSSALAM": "687653fc-236b-4bc8-baf8-88a81f17618a", | |
| "BULGARIA": "7345359f-4f82-4089-a153-acf32254b0a3", | |
| "BURKINA FASO": "83c3ceed-af1c-4b06-bb4a-0217712043c8", | |
| "BURUNDI": "ff78c621-fa7f-467b-812c-9912e88ac430", | |
| "CAMBODIA": "3f387ed5-13e6-42b8-ba77-84f281d995f7", | |
| "CAMEROON": "1d91caf5-bcf2-4e4b-8c77-7d62781bf220", | |
| "CANADA": "47db151f-ce3e-406d-b707-8fcdfd088a61", | |
| "CAPE VERDE": "3ecf1c2c-de95-4811-9e5e-4b03816a108a", | |
| "CENTRAL AFRICAN REPUBLIC": "0c72f1ee-2e31-4713-86ac-e079c0f994e7", | |
| "CHAD": "abbd5561-fde5-42e5-8237-382ab808ec8f", | |
| "CHILE": "0c597abe-9cf2-4171-8449-06bcc928afd8", | |
| "CHINA": "883891b6-1c3f-41c3-9279-4745f63575cb", | |
| "COLOMBIA": "76578a13-3e64-41da-8d91-deef561311c2", | |
| "COMOROS": "e350c59c-5236-4207-8ef9-88e4aebeda9d", | |
| "CONGO REPUBLIC": "41d426c7-b47f-4e73-a810-dc78bcaae82f", | |
| "COSTA RICA": "d16f1de8-e959-4569-89ab-0e165456eeb0", | |
| "CROATIA": "a93f74b3-fcc6-473c-89f9-0e7af7d7f6b6", | |
| "CUBA": "4f6e354a-ca46-49f3-b035-8188b2d1f6b6", | |
| "CYPRUS": "8e5508a3-0173-4be4-bdc8-23986b9179d7", | |
| "CZECH REPUBLIC": "318736c7-f14f-4560-a65a-8c2304ea6805", | |
| "DEMOCRATIC REPUBLIC OF CONGO": "7d5d09ba-f7a2-4780-a730-ac5749aeae88", | |
| "DENMARK": "25d567a3-7a4e-4248-9800-647028da9b71", | |
| "DJIBOUTI": "79c7ec19-d69c-48f4-9cbc-96110e24667d", | |
| "DOMINICA": "ac9f23a8-fd8e-4877-bab8-367ce26cbb06", | |
| "DOMINICAN REPUBLIC": "0df2afd4-1945-40c3-a3bb-bcc1fa8ab081", | |
| "ECUADOR": "c7339e3d-cbe9-4893-af6f-0bdc4b6ebd26", | |
| "EGYPT": "29a5479e-3e9a-4fe6-a4b2-705de1654fb7", | |
| "EL SALVADOR": "c20c9d1a-3d42-48c1-8033-12a837058ba6", | |
| "EQUATORIAL GUINEA": "70003abd-8f1e-4baa-b513-a406405c85bd", | |
| "ERITREA": "e9ab2043-0c97-4c52-b103-8118861898d0", | |
| "ESTONIA": "448fa696-e0ca-4e5f-89cd-671f04d48c9f", | |
| "ESWATINI": "033731b5-112f-49bc-a182-7b1429049dbf", | |
| "ETHIOPIA": "a0a10b9e-3157-4d65-84c3-e96bc8a99979", | |
| "FIJI": "67f91d7d-5166-44d3-b050-45fe4fa6bad7", | |
| "FINLAND": "52006b57-7b02-4902-b55d-0dadc577b75c", | |
| "FRANCE": "23e8665d-2f2c-4841-acf6-6a2cac358ed4", | |
| "GABON": "38381f24-d8fb-42e4-ba80-f0b1451e5577", | |
| "GAMBIA": "41d04ba0-6764-4b55-88ed-e1f776743128", | |
| "GEORGIA": "f851c74a-b9dc-4d9d-a99c-65b6a69afe95", | |
| "GERMANY": "2d7c66c1-9341-459c-83a8-8138766b133c", | |
| "GHANA": "012e51a0-4e9c-4dfd-8438-b4cc375fb0ed", | |
| "GREECE": "47aa18d9-ed29-4250-9206-6f0cdaec94ff", | |
| "GRENADA": "9d14a529-4116-449f-818f-538994ae14c8", | |
| "GUATEMALA": "0159ccc8-a34c-4a6d-846f-510296a43536", | |
| "GUINEA": "5cd3306f-e3de-4031-b1f4-fb5d07e4de70", | |
| "GUINEA-BISSAU": "e04e1e50-e2a8-4b11-97c3-f0c8d759af9a", | |
| "GUYANA": "77774317-99e1-43c3-ada2-62aaf1a22f81", | |
| "HAITI": "91d309f2-de3f-40bb-80e7-4b68077fdc33", | |
| "HONDURAS": "1fee433d-5093-4752-bd18-019be82ebcae", | |
| "HONG KONG SAR": "2ecd2c23-33ff-4624-80d3-900777e7801e", | |
| "HUNGARY": "5d2f2361-562a-4f59-84f7-a8dabb127add", | |
| "ICELAND": "e6ff784e-9de7-44a8-9f67-2c3beecd2efc", | |
| "INDIA": "f51f4255-1b0b-4a1b-be01-4bc82909c33e", | |
| "INDONESIA": "66c0c992-6f20-4464-afa0-160e48985792", # Note: Indonesia in the list | |
| "IRAN": "66c0c992-6f20-4464-afa0-160e48985792", | |
| "IRAQ": "d106dd98-25c4-489d-ae37-a73bdf0b4b7d", | |
| "IRELAND": "43f85fba-b363-4c2e-aa27-83fb1a9da94c", | |
| "ITALY": "a24388f2-c0a5-4807-894c-de6823a79a3f", | |
| "IVORY COAST": "7dc9c73c-4c60-4ebc-bd41-13010e8c2952", | |
| "JAMAICA": "652d4cc5-c90a-44d0-866e-94167c3dbd3b", | |
| "JAPAN": "1beaa902-fae3-4733-984d-437375211a3d", | |
| "JORDAN": "4c108286-8eb9-48dc-9590-4a0a3cfe2b40", | |
| "KAZAKHSTAN": "4f68af57-24fc-4da3-9524-5a344f7ade7d", | |
| "KENYA": "009337db-a9ad-43af-b823-c95aca6798b9", | |
| "KUWAIT": "6d8dc2ec-dbc0-4416-a3e1-553935ee4f23", | |
| "KYRGYZSTAN": "6214e7cd-b344-4698-ae88-d32b80cc26f3", | |
| "LAOS": "50ebf46b-2f8e-4345-bc0c-461857b06847", | |
| "LATVIA": "af83f9ed-713c-4ae6-a588-c3c79fe58d44", | |
| "LEBANON": "477fc793-7ad7-4955-af53-09d9ffd197b8", | |
| "LESOTHO": "ceef5a50-b212-4f03-b132-1db4fa3ee8d9", | |
| "LIBYA": "a6f469cc-61ca-4640-a200-350e33630a7a", | |
| "LIECHTENSTEIN": "97dc8df9-6382-47c4-abcb-af92e0833ab3", | |
| "LITHUANIA": "f0127376-b8cc-49b6-ba94-90137d5c9912", | |
| "LUXEMBOURG": "b2ac4801-ca60-4fd1-9a09-9de023939a17", | |
| "MACAO SAR": "63ad0853-32f7-444a-94d3-1db1515f6411", | |
| "MACEDONIA": "ca2e8749-8c9c-48ea-8dbe-1d3733ffaf70", | |
| "MADAGASCAR": "22f9fd8e-f208-49c9-8afd-968066319f12", | |
| "MALAWI": "d2a7beda-5424-4d51-ae10-f37ca2f470c5", | |
| "MALAYSIA": "18ea95db-04a0-4564-a871-2d89bf9b6bd5", | |
| "MALDIVES": "96304e76-87c2-4ea2-979d-d1dd0a3a8e09", | |
| "MALI": "5e7032d6-4a29-412a-b1f0-6a88556862be", | |
| "MALTA": "71757f61-a945-4ab0-bccd-3eba9e29ce9b", | |
| "MAURITANIA": "b5ec75db-b92d-4b7e-ad2b-d97d132d00cb", | |
| "MAURITIUS": "a8c9190a-f11a-42f6-914e-32907a9b8c50", | |
| "MEXICO": "89b96bc1-e4ee-433c-953a-d5c5316ad3d8", | |
| "MOLDOVA": "e8548c39-a894-4283-8792-1c54b601bd13", | |
| "MONACO": "c17c385d-793f-42bf-be59-dd910d88e345", | |
| "MONGOLIA": "c9464187-3364-4455-9637-a4801beb1293", | |
| "MONTENEGRO": "b12556a9-7ee3-43b8-8630-7b99643d2419", | |
| "MOROCCO": "ae28d967-9143-4a3c-88c9-32ecda8a5f14", | |
| "MOZAMBIQUE": "aa50af6e-ffda-421a-8b07-bbd8376ffcbe", | |
| "MYANMAR": "63419316-d037-4edc-ab35-d3bd8da1487c", | |
| "NAMIBIA": "bb48769c-3071-42bb-813e-645f18b1ff0b", | |
| "NEPAL": "8da9858f-bdf2-4b6b-9429-238ebec039dd", | |
| "NETHERLANDS": "220ac96a-b9c9-4f77-8711-677004adaee2", | |
| "NEW ZEALAND": "ef268b73-8b9e-478f-a459-32058e33b1f3", | |
| "NICARAGUA": "6206cd29-800f-46b1-8a8d-a8cd70123281", | |
| "NIGER": "edc16d7a-71ac-45ac-8e22-315f2ec89d96", | |
| "NIGERIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f", | |
| "NORTH MACEDONIA": "f565a323-3bae-4dcb-b32e-95c6f1819c0f", | |
| "NORWAY": "f322b0c9-61a5-4ee8-9cdb-f046a5245e6b", | |
| "OMAN": "6aa75c1b-e7eb-43f8-88d7-1f0ad5764b43", | |
| "PAKISTAN": "d6adb2da-a3d0-4143-bcc5-1651b0671851", | |
| "PALAU": "90782348-c645-4400-84cb-2c4307e2d374", | |
| "PALESTINE": "e7b5ec63-4203-4668-a377-dd2b526682df", | |
| "PANAMA": "ec76dc7f-faca-4bfd-b76d-3de68e0cf280", | |
| "PAPUA NEW GUINEA": "275b19e4-50a1-41c7-b6d8-cfbbdb03d046", | |
| "PARAGUAY": "e58d82e1-a65c-410a-ac8a-866c6f25da9f", | |
| "PERU": "e5f942ed-23fd-4f2d-a5d4-97399753ec08", | |
| "PHILIPPINES": "8db55c27-d5c2-4cf7-bbaa-6db35fe3863d", | |
| "POLAND": "733b9eb2-8fea-49ab-9b7d-ba9d56ac0108", | |
| "PORTUGAL": "2fe14725-2c04-4c6a-963c-dcde840dcfd5", | |
| "QATAR": "ff4c5ef0-b98c-4316-93a1-401d0e503d23", | |
| "REPUBLIC OF KOREA": "0e63775f-7370-4203-9ff4-8a7ce1b83da5", | |
| "REPUBLIC OF SOUTH AFRICA": "89ee80a3-b660-44cc-b09d-9e8b02df327c", | |
| "ROMANIA": "cd630114-9710-49a8-a450-9eff8b9f7966", | |
| "RUSSIA": "c9927f33-3778-4955-a5ed-5be3fe3c2c86", | |
| "RWANDA": "fb80a5a2-7da4-4684-9a08-ac9935a7687c", | |
| "SAINT KITTS AND NEVIS": "652bdc10-1147-4da7-b87d-0a2d87f456ff", | |
| "SAINT LUCIA": "b72c2f44-6a70-44cc-92c9-e14f791ed3a0", | |
| "SAINT VINCENT AND GRENADINES": "57c54eb4-4317-4873-b20f-9eca9db38461", | |
| "SAMOA": "1ff2297d-ca8a-41d0-8ed3-835773306c91", | |
| "SAN MARINO": "1bd460aa-a6f7-4517-b557-68c9a5c4b575", | |
| "SAO TOME AND PRINCIPE": "a6945046-ca8d-4df0-9d85-76605233e7cd", | |
| "SAUDI ARABIA": "36cb6d47-cfd4-42a0-ac3d-398d76f932c3", | |
| "SENEGAL": "02b4584f-7c26-4b8f-8d07-7d79552045a7", | |
| "SERBIA": "77514d20-e823-4134-9be1-663279285570", | |
| "SEYCHELLES": "9a5bf6eb-bdd3-46ce-9298-325ee83c6e8a", | |
| "SIERRA LEONE": "536c5bbc-9d92-4831-856f-0b4183866560", | |
| "SINGAPORE": "76f0332d-ff00-4e69-acda-73d153c6ce66", | |
| "SLOVAKIA": "2c48b68b-5184-4d9f-9635-b4376855f832", | |
| "SLOVENIA": "95c29f1b-464b-4985-b5f2-9589e977cd65", | |
| "SOLOMON ISLANDS": "a6cc707d-cb28-4fff-aca0-8d7cb53e003b", | |
| "SOUTH SUDAN": "4a72d20d-8764-4f24-a6c5-5733637e0947", | |
| "SPAIN": "a7391e9c-7a1f-4184-bb40-b8d82b001643", | |
| "SRI LANKA": "c0d856c9-bac0-4566-aeb6-db30d16b611c", | |
| "SUDAN": "f5d95a4d-2857-44fe-a124-1c041aab463a", | |
| "SURINAME": "0697ba08-aea2-40cb-b54a-4dddf66cc568", | |
| "SWEDEN": "5285ce98-204f-42c7-9a8b-0ce793992540", | |
| "SWITZERLAND": "4b8a01ac-ebac-4157-ba96-52a42452b8fc", | |
| "SYRIA": "a4c2a6b9-599e-4f32-a2cb-a7a8c419d3fd", | |
| "TAIWAN": "cab7f051-6e1e-40e8-bc1d-304c7d718205", | |
| "TAJIKISTAN": "4e06dd54-cb40-4689-ba37-d52ee18b6232", | |
| "TANZANIA": "781d5b7f-8d72-4ab6-bb85-9d84c7002655", | |
| "THAILAND": "568938de-3757-4038-ab10-9515de53dcde", | |
| "TIMOR LESTE": "92991bf5-3eb4-4bcd-a9cd-eb3791f61855", | |
| "TOGO": "4472c324-c691-41ef-993f-1fdc68bdaeb4", | |
| "TONGA": "5da97514-d2f2-49e2-8b3c-c2cd837076c9", | |
| "TRINIDAD AND TOBAGO": "7052f57f-ed0f-4af7-a2bd-c56add55c58b", | |
| "TUNISIA": "d43d13b8-c1f6-4b99-96fc-46f8566cf2f8", | |
| "TURKEY": "ef010e72-e3d1-47af-81eb-c801725dfd56", | |
| "TURKMENISTAN": "d86a010c-3d78-4e3c-a0a2-a8c28ba2faa5", | |
| "UGANDA": "77291ff1-eab3-4a48-b4f8-a554edb33c54", | |
| "UKRAINE": "e01dfda0-5841-4395-8073-995bac33530c", | |
| "UNITED ARAB EMIRATES": "97ed438c-9faf-4f9c-a407-72e3718ca022", | |
| "UNITED KINGDOM": "94ce109e-b072-4229-9285-cf9cacc8fb2d", | |
| "UNITED STATES OF AMERICA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7", | |
| "USA": "9b23d98e-5f93-47c0-bc86-7c01185dc7a7", # Alias | |
| "URUGUAY": "2b5383fd-857a-4d13-9da2-6cc8817d94e4", | |
| "UZBEKISTAN": "78743012-4d5f-499c-b1f7-87a30df735f9", | |
| "VANUATU": "402de03b-eba7-4fca-ab14-5a1c8efeeec7", | |
| "VENEZUELA": "41b2e501-26aa-4787-97fa-56af1e94b82c", | |
| "VIETNAM": "e3838f92-2f1d-449c-b6b8-76b089027f0a", | |
| "YEMEN": "e8324fd1-9cee-4772-97af-651408c0bab9", | |
| "ZAMBIA": "16543ea3-11bd-4551-9d9e-44abbec77c25", | |
| "ZIMBABWE": "e0ea4fa1-498e-4d95-89eb-24415907dcab" | |
| } | |
| # Parent Activity mapping (Step 0) | |
| PARENT_ACTIVITY_MAPPING = { | |
| "General, Family, or Social": "d5bc2168-2f4a-4396-8eae-3d895a0508e9", | |
| "Investment, Business, or Government": "f7a8ac1d-a71f-45d3-919f-985e295533f2", | |
| "Golden Visa": "63350ecf-72a1-4fd1-8674-d42f815615fd", | |
| "Diaspora": "58d2cbb5-423a-4f8e-8e3c-bcddd9f7980e", | |
| "Silver Hair & Retirement": "ad33081b-7c1d-4c07-a7ed-aa36f0b54bb3", | |
| "Second Home": "1bb683e9-bb81-4a85-9651-ba4d4174ff0e", | |
| "Professional or Employment": "40d4fdc7-4117-48ff-9ed6-3950088fc760", | |
| "Journalist or Film": "01c17cd8-912b-457f-b3cc-d8919ab8964b", | |
| "Sport or Performer": "ec91d849-02c7-4d1d-831b-ac0764ab8cc5", | |
| "Study, Courses, Training or Research": "2221fe46-ea42-4d2f-a332-899e60ef6fe2", | |
| "Work and Holiday": "f9b4e188-f90e-4f3d-bace-71c2c27d5159" | |
| } | |
| def __init__(self): | |
| self.headers = { | |
| "Accept": "application/json, text/plain, */*", | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", | |
| "Referer": "https://evisa.imigrasi.go.id/", | |
| "Origin": "https://evisa.imigrasi.go.id", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Accept-Encoding": "gzip, deflate, br" | |
| } | |
| def get_sub_activities(self, parent_activity_id: str) -> Optional[List[Dict]]: | |
| """ | |
| Step 0: Get sub-activities for a parent activity | |
| """ | |
| payload = { | |
| "parent_id": parent_activity_id, | |
| "step": "0" | |
| } | |
| try: | |
| response = requests.post(self.BASE_URL, data=payload, headers=self.headers) | |
| response.raise_for_status() | |
| data = response.json() | |
| if "data" in data: | |
| return data["data"] | |
| return None | |
| except Exception as e: | |
| print(f"Error fetching sub-activities: {e}") | |
| return None | |
| def _extract_stay_and_cost_from_html(self, html_content: str) -> Dict[str, Optional[str]]: | |
| """Parses raw HTML to find and extract summary text for Stay and Cost.""" | |
| summary = {"stay": None, "cost": None} | |
| # print("\n--- DEBUG: Parsing HTML for Stay/Cost ---") | |
| # print(html_content) | |
| # print("-----------------------------------------\n") | |
| if not html_content: | |
| return summary | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| for strong_tag in soup.find_all('strong'): | |
| heading_text = strong_tag.get_text(strip=True).lower() | |
| # Navigate up to the top-level parent block of the heading | |
| parent_block = strong_tag | |
| while parent_block.parent and parent_block.parent.name != '[document]': | |
| parent_block = parent_block.parent | |
| # The value is in the next sibling block | |
| next_block = parent_block.find_next_sibling() | |
| value = "Not specified" | |
| if next_block: | |
| list_items = next_block.find_all('li') | |
| if list_items: | |
| # If the value is a list, clean each item and join with a line break | |
| cleaned_items = [s.get_text(strip=True).replace('(Extendable)', '').strip() for s in list_items if s.get_text(strip=True)] | |
| value = "\n" + "\n".join(f"- {item}" for item in cleaned_items) | |
| else: | |
| # Otherwise, get text from the whole block and clean it | |
| raw_text = next_block.get_text(separator=' ', strip=True) | |
| value = raw_text.replace('(Extendable)', '').strip() | |
| if 'stay' in heading_text: | |
| summary['stay'] = value | |
| elif 'cost' in heading_text: | |
| summary['cost'] = value | |
| return summary | |
| def fix_html_structure(self, html_content: str) -> str: | |
| """ | |
| Cleans and intelligently restructures malformed HTML from the source API. | |
| It groups headings with their subsequent content into single <li> elements. | |
| """ | |
| if not html_content: | |
| return "" | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| final_list_items = [] | |
| current_li_buffer = [] | |
| def is_heading(element): | |
| """Heuristic to determine if an element is a section heading.""" | |
| if isinstance(element, NavigableString): | |
| return False | |
| # A heading is identified by the presence of a <strong> tag. | |
| return element.find('strong') is not None | |
| def flush_buffer(): | |
| """Processes the buffer and adds its content as a single <li>.""" | |
| if current_li_buffer: | |
| content = "".join(str(el) for el in current_li_buffer) | |
| final_list_items.append(f"<li>{content}</li>") | |
| current_li_buffer.clear() | |
| for element in soup.contents: | |
| if not str(element).strip(): # Skip empty whitespace nodes | |
| continue | |
| if is_heading(element) and current_li_buffer: | |
| flush_buffer() | |
| current_li_buffer.append(element) | |
| flush_buffer() # Flush the last remaining item | |
| rebuilt_html = "".join(final_list_items) | |
| rebuilt_html = rebuilt_html.replace('</ol></li>', '</ol>') # Clean up known stray tags | |
| return f"<ul>{rebuilt_html}</ul>" | |
| def get_visa_types(self, activity_id: str, country_id: str) -> Optional[Dict]: | |
| """ | |
| Step 1: Get available visa types for activity and country | |
| """ | |
| payload = { | |
| "activity_id": activity_id, | |
| "country_id": country_id, | |
| "step": "1" | |
| } | |
| try: | |
| response = requests.post(self.BASE_URL, data=payload, headers=self.headers) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Handle both dict and list responses | |
| if isinstance(data, dict): | |
| if data.get("status") == "success": | |
| # Enrich visa types with stay and cost summaries | |
| visa_list = data.get("data", []) | |
| for visa in visa_list: | |
| details_data = self.get_visa_details(visa['id']) | |
| if details_data and "visaType" in details_data and details_data["visaType"]: | |
| info_html = details_data["visaType"][0].get("info", "") | |
| summary = self._extract_stay_and_cost_from_html(info_html) | |
| visa['stay_summary'] = summary.get('stay') | |
| visa['cost_summary'] = summary.get('cost') | |
| else: | |
| visa['stay_summary'] = "N/A" | |
| visa['cost_summary'] = "N/A" | |
| data['data'] = visa_list | |
| return data | |
| elif data.get("status") == "empty": | |
| return {"status": "empty", "message": "This type of visa must be applied by guarantor."} | |
| elif isinstance(data, list): | |
| # API returns ["status","empty",false] for empty responses | |
| print(data) | |
| if len(data) >= 2 and data[1] == "empty": | |
| return {"status": "empty", "message": "This type of visa must be applied by guarantor."} | |
| return None | |
| except Exception as e: | |
| print(f"Error fetching visa types: {e} {data}") | |
| return None | |
| def get_visa_details(self, visa_type_id: str) -> Optional[Dict]: | |
| """ | |
| Step 2: Get detailed information for a specific visa type | |
| """ | |
| payload = { | |
| "visa_type_id": visa_type_id, | |
| "step": "2" | |
| } | |
| try: | |
| response = requests.post(self.BASE_URL, data=payload, headers=self.headers) | |
| response.raise_for_status() | |
| data = response.json() | |
| if data.get("status") == "success": | |
| return data["data"] | |
| return None | |
| except Exception as e: | |
| print(f"Error fetching visa details: {e}") | |
| return None | |
| def get_country_id(self, country_name: str) -> Optional[str]: | |
| """ | |
| Get country ID from country name | |
| """ | |
| country_upper = country_name.upper() | |
| return self.COUNTRY_MAPPING.get(country_upper) | |
| def get_parent_activity_id(self, activity_name: str) -> Optional[str]: | |
| """ | |
| Get parent activity ID from activity name | |
| """ | |
| return self.PARENT_ACTIVITY_MAPPING.get(activity_name) | |
| def get_full_visa_info(self, country_name: str, parent_activity_name: str, | |
| sub_activity_id: Optional[str] = None) -> Dict[str, Any]: | |
| """ | |
| Get complete visa information through the entire flow | |
| """ | |
| result = { | |
| "success": False, | |
| "country": country_name, | |
| "parent_activity": parent_activity_name, | |
| "data": None, | |
| "error": None | |
| } | |
| # Get country ID | |
| country_id = self.get_country_id(country_name) | |
| if not country_id: | |
| result["error"] = f"Country '{country_name}' not found" | |
| return result | |
| # Get parent activity ID | |
| parent_activity_id = self.get_parent_activity_id(parent_activity_name) | |
| if not parent_activity_id: | |
| result["error"] = f"Activity '{parent_activity_name}' not found" | |
| return result | |
| # Step 0: Get sub-activities | |
| sub_activities = self.get_sub_activities(parent_activity_id) | |
| if not sub_activities: | |
| result["error"] = "Failed to fetch sub-activities" | |
| return result | |
| result["data"] = { | |
| "sub_activities": sub_activities, | |
| "visa_types": [], | |
| "selected_sub_activity": None | |
| } | |
| # If sub_activity_id provided, get visa types | |
| if sub_activity_id: | |
| visa_types_data = self.get_visa_types(sub_activity_id, country_id) | |
| if visa_types_data: | |
| if visa_types_data.get("status") == "empty": | |
| result["data"]["message"] = visa_types_data.get("message") | |
| else: | |
| result["data"]["visa_types"] = visa_types_data.get("data", []) | |
| result["data"]["all_visa_info"] = visa_types_data.get("all", []) | |
| result["data"]["selected_sub_activity"] = sub_activity_id | |
| result["success"] = True | |
| return result | |
| def get_visa_full_details(self, visa_type_id: str) -> Dict[str, Any]: | |
| """ | |
| Get complete details for a specific visa type | |
| """ | |
| result = { | |
| "success": False, | |
| "data": None, | |
| "error": None | |
| } | |
| details = self.get_visa_details(visa_type_id) | |
| if details: | |
| # Parse HTML content if present | |
| if "visaType" in details and details["visaType"]: | |
| visa_info = details["visaType"][0] | |
| # The source HTML is malformed, so we must clean and restructure it. | |
| if "info" in visa_info: | |
| visa_info["info_html"] = self.fix_html_structure(visa_info.get("info", "")) | |
| if "information" in visa_info: | |
| visa_info["information_html"] = self.fix_html_structure(visa_info.get("information", "")) | |
| result["data"] = visa_info | |
| result["success"] = True | |
| else: | |
| result["error"] = "Failed to fetch visa details" | |
| return result |