mrfirdauss commited on
Commit
0093c99
Β·
1 Parent(s): 4904b99
app/util/visa_availability_scraper_playwright.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from typing import Dict, Optional, List
4
+ from playwright.async_api import async_playwright
5
+
6
+ class PassportIndexVisaScraper:
7
+ def __init__(self, debug: bool = True):
8
+ """
9
+ Initialize the Passport Index visa scraper using Playwright
10
+
11
+ Args:
12
+ debug: Enable debug output
13
+ """
14
+ self.base_url = "https://www.passportindex.org/travel-visa-checker/"
15
+ self.api_url = "https://www.passportindex.org/core/visachecker.php"
16
+ self.debug = debug
17
+ self.browser = None
18
+ self.context = None
19
+ self.page = None
20
+
21
+ async def __aenter__(self):
22
+ """Initialize browser with stealth mode"""
23
+ self.playwright = await async_playwright().start()
24
+
25
+ self.browser = await self.playwright.chromium.launch(
26
+ headless=True, # Using headless mode
27
+ args=[
28
+ '--disable-blink-features=AutomationControlled',
29
+ '--disable-dev-shm-usage',
30
+ '--no-sandbox',
31
+ '--disable-setuid-sandbox',
32
+ '--disable-web-security',
33
+ '--disable-features=IsolateOrigins,site-per-process'
34
+ ]
35
+ )
36
+
37
+ # Create context with realistic settings
38
+ self.context = await self.browser.new_context(
39
+ viewport={'width': 1920, 'height': 1080},
40
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
41
+ locale='en-US',
42
+ timezone_id='America/New_York'
43
+ )
44
+
45
+ self.page = await self.context.new_page()
46
+
47
+ await self.page.add_init_script("""
48
+ // Override the navigator.webdriver property
49
+ Object.defineProperty(navigator, 'webdriver', {
50
+ get: () => undefined
51
+ });
52
+
53
+ // Override chrome property
54
+ window.chrome = {
55
+ runtime: {}
56
+ };
57
+
58
+ // Override permissions
59
+ const originalQuery = window.navigator.permissions.query;
60
+ window.navigator.permissions.query = (parameters) => (
61
+ parameters.name === 'notifications' ?
62
+ Promise.resolve({ state: Notification.permission }) :
63
+ originalQuery(parameters)
64
+ );
65
+ """)
66
+
67
+ if self.debug:
68
+ print("πŸš€ Browser initialized with stealth mode")
69
+
70
+ return self
71
+
72
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
73
+ """Clean up browser resources"""
74
+ if self.page:
75
+ await self.page.close()
76
+ if self.context:
77
+ await self.context.close()
78
+ if self.browser:
79
+ await self.browser.close()
80
+ if self.playwright:
81
+ await self.playwright.stop()
82
+
83
+ if self.debug:
84
+ print("πŸ”’ Browser closed")
85
+
86
+ async def initialize_session(self) -> bool:
87
+ """
88
+ Navigate to the website and wait for it to load properly
89
+ """
90
+ try:
91
+ if self.debug:
92
+ print("πŸ“± Initializing session...")
93
+
94
+ # Navigate to the page
95
+ try:
96
+ response = await self.page.goto(
97
+ self.base_url,
98
+ wait_until='domcontentloaded',
99
+ timeout=30000
100
+ )
101
+ await self.page.wait_for_timeout(3000)
102
+
103
+ # Get the cl value from the page
104
+ cl_value = await self.page.evaluate("""
105
+ () => {
106
+ const clInput = document.querySelector('#cl');
107
+ return clInput ? clInput.value : 'bc2140a2d83928ce1112d01e610bad89';
108
+ }
109
+ """)
110
+
111
+ if self.debug:
112
+ print(f"βœ… Page loaded, session ID: {cl_value}")
113
+
114
+ return True
115
+
116
+ except Exception as e:
117
+ if self.debug:
118
+ print(f"⚠️ Page load issue: {e}, continuing anyway...")
119
+ return True
120
+
121
+ except Exception as e:
122
+ print(f"❌ Error initializing session: {e}")
123
+ return False
124
+
125
+ async def check_visa_requirement_browser(self, passport_country: str, destination_country: str) -> Optional[Dict]:
126
+ """
127
+ Check visa requirements using browser automation
128
+
129
+ Args:
130
+ passport_country: Two-letter country code for passport
131
+ destination_country: Two-letter country code for destination
132
+
133
+ Returns:
134
+ Dictionary with visa information or None if failed
135
+ """
136
+ try:
137
+ if self.debug:
138
+ print(f"🌐 Checking {passport_country.upper()} β†’ {destination_country.upper()}")
139
+
140
+ # Get the current session ID from the page
141
+ cl_value = await self.page.evaluate("""
142
+ () => {
143
+ const clInput = document.querySelector('#cl');
144
+ return clInput ? clInput.value : 'bc2140a2d83928ce1112d01e610bad89';
145
+ }
146
+ """)
147
+
148
+ # Make the API request through the browser with proper argument passing
149
+ result = await self.page.evaluate("""
150
+ async (args) => {
151
+ const [passport, destination, sessionId] = args;
152
+ const formData = new URLSearchParams();
153
+ formData.append('d', destination);
154
+ formData.append('s', passport);
155
+ formData.append('cl', sessionId);
156
+
157
+ try {
158
+ const response = await fetch('https://www.passportindex.org/core/visachecker.php', {
159
+ method: 'POST',
160
+ headers: {
161
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
162
+ 'X-Requested-With': 'XMLHttpRequest',
163
+ 'Accept': 'application/json, text/javascript, */*; q=0.01'
164
+ },
165
+ body: formData.toString(),
166
+ credentials: 'include'
167
+ });
168
+
169
+ if (!response.ok) {
170
+ throw new Error(`HTTP ${response.status}`);
171
+ }
172
+
173
+ const data = await response.json();
174
+ return data;
175
+ } catch (error) {
176
+ return { error: error.message };
177
+ }
178
+ }
179
+ """, [passport_country.lower(), destination_country.lower(), cl_value])
180
+
181
+ if result and 'error' not in result:
182
+ if self.debug:
183
+ print(f"βœ… Got result: {result}")
184
+ return result
185
+ elif result and 'error' in result:
186
+ print(f"❌ API Error: {result['error']}")
187
+ return None
188
+ else:
189
+ return None
190
+
191
+ except Exception as e:
192
+ print(f"❌ Error checking visa requirement: {e}")
193
+ return None
194
+
195
+ async def check_visa_interactive(self, passport_country: str, destination_country: str) -> Optional[Dict]:
196
+ """
197
+ Alternative method: Use the interactive UI to check visa requirements
198
+ """
199
+ try:
200
+ if self.debug:
201
+ print(f"πŸ–±οΈ Using interactive method for {passport_country.upper()} β†’ {destination_country.upper()}")
202
+
203
+ # Click on the passport selector
204
+ await self.page.click('.vch-select-pass')
205
+ await self.page.wait_for_timeout(500)
206
+
207
+ # Find and click the country in the list
208
+ passport_selector = f'.vch-passports .s-div[data-ccode="{passport_country.lower()}"]'
209
+ await self.page.wait_for_selector(passport_selector, timeout=5000)
210
+ await self.page.click(passport_selector)
211
+ await self.page.wait_for_timeout(500)
212
+
213
+ # Click on the destination selector
214
+ await self.page.click('.vch-select-des')
215
+ await self.page.wait_for_timeout(500)
216
+
217
+ # Find and click the destination country
218
+ dest_selector = f'.vch-destinations .s-div[data-ccode="{destination_country.lower()}"]'
219
+ await self.page.wait_for_selector(dest_selector, timeout=5000)
220
+ await self.page.click(dest_selector)
221
+ await self.page.wait_for_timeout(1000)
222
+
223
+ # Get the result from the page
224
+ result = await self.page.evaluate("""
225
+ () => {
226
+ const resultElement = document.querySelector('.vch-result');
227
+ if (resultElement) {
228
+ const text = resultElement.querySelector('.text');
229
+ const days = resultElement.querySelector('.days');
230
+ return {
231
+ text: text ? text.textContent : '',
232
+ days: days ? days.textContent : '',
233
+ pass: '""" + passport_country.lower() + """',
234
+ dest: '""" + destination_country.upper() + """'
235
+ };
236
+ }
237
+ return null;
238
+ }
239
+ """)
240
+
241
+ return result
242
+
243
+ except Exception as e:
244
+ if self.debug:
245
+ print(f"❌ Interactive method failed: {e}")
246
+ return None
247
+
248
+ async def check_multiple_destinations(self, passport_country: str, destinations: List[str], delay: float = 2.0) -> Dict:
249
+ """
250
+ Check visa requirements for multiple destinations
251
+
252
+ Args:
253
+ passport_country: Two-letter country code for passport
254
+ destinations: List of two-letter country codes for destinations
255
+ delay: Delay between requests in seconds
256
+
257
+ Returns:
258
+ Dictionary mapping destination codes to visa information
259
+ """
260
+ results = {}
261
+
262
+ for i, dest in enumerate(destinations, 1):
263
+ print(f"\n[{i}/{len(destinations)}] Checking {passport_country.upper()} β†’ {dest.upper()}...")
264
+
265
+ # Try API method first
266
+ result = await self.check_visa_requirement_browser(passport_country, dest)
267
+
268
+ # If API fails, try interactive method
269
+ if not result:
270
+ result = await self.check_visa_interactive(passport_country, dest)
271
+
272
+ if result:
273
+ results[dest] = result
274
+ text = result.get('text', 'No text available')
275
+ print(f" βœ… Result: {text}")
276
+ else:
277
+ results[dest] = None
278
+ print(f" ❌ Failed to get result")
279
+
280
+ # Rate limiting
281
+ if i < len(destinations):
282
+ print(f" ⏳ Waiting {delay} seconds...")
283
+ await asyncio.sleep(delay)
284
+
285
+ return results
286
+
287
+ def format_result(self, result: Dict) -> str:
288
+ """Format a single result for display"""
289
+ if not result:
290
+ return "No information available"
291
+
292
+ text = result.get('text', 'N/A')
293
+ dest = result.get('dest', 'N/A')
294
+ passport = result.get('pass', 'N/A')
295
+
296
+ return f"{passport.upper()} β†’ {dest.upper()}: {text}"
297
+
298
+
299
+ async def main():
300
+ """Main function to demonstrate usage"""
301
+ print("="*60)
302
+ print(" Passport Index Visa Checker (Playwright)")
303
+ print("="*60)
304
+
305
+ async with PassportIndexVisaScraper(debug=True) as scraper:
306
+ # Initialize session
307
+ if not await scraper.initialize_session():
308
+ print("❌ Failed to initialize session")
309
+ return
310
+
311
+ print("\n" + "="*60)
312
+ print(" Testing visa requirements...")
313
+ print("="*60)
314
+
315
+ # Test single visa requirement
316
+ print("\nπŸ“ Single visa check: US β†’ GB")
317
+ print("-" * 40)
318
+ result = await scraper.check_visa_requirement_browser('us', 'gb')
319
+ if result:
320
+ print(f"Result: {scraper.format_result(result)}")
321
+ else:
322
+ print("Trying interactive method...")
323
+ result = await scraper.check_visa_interactive('us', 'gb')
324
+ if result:
325
+ print(f"Result: {scraper.format_result(result)}")
326
+
327
+ # Test multiple destinations
328
+ print("\nπŸ“ Multiple destinations for US passport:")
329
+ print("-" * 40)
330
+ destinations = ['ca', 'mx', 'jp', 'au'] # Canada, Mexico, Japan, Australia
331
+ results = await scraper.check_multiple_destinations('us', destinations, delay=2.0)
332
+
333
+ print("\nπŸ“Š Summary:")
334
+ for dest, result in results.items():
335
+ if result:
336
+ print(f" βœ… {scraper.format_result(result)}")
337
+ else:
338
+ print(f" ❌ US β†’ {dest.upper()}: Failed")
339
+
340
+
341
+ COUNTRY_CODES = {
342
+ 'af': 'Afghanistan', 'al': 'Albania', 'dz': 'Algeria', 'ad': 'Andorra',
343
+ 'ao': 'Angola', 'ag': 'Antigua and Barbuda', 'ar': 'Argentina', 'am': 'Armenia',
344
+ 'au': 'Australia', 'at': 'Austria', 'az': 'Azerbaijan', 'bs': 'Bahamas',
345
+ 'bh': 'Bahrain', 'bd': 'Bangladesh', 'bb': 'Barbados', 'by': 'Belarus',
346
+ 'be': 'Belgium', 'bz': 'Belize', 'bj': 'Benin', 'bt': 'Bhutan',
347
+ 'bo': 'Bolivia', 'ba': 'Bosnia and Herzegovina', 'bw': 'Botswana', 'br': 'Brazil',
348
+ 'bn': 'Brunei', 'bg': 'Bulgaria', 'bf': 'Burkina Faso', 'bi': 'Burundi',
349
+ 'kh': 'Cambodia', 'cm': 'Cameroon', 'ca': 'Canada', 'cv': 'Cape Verde',
350
+ 'cf': 'Central African Republic', 'td': 'Chad', 'cl': 'Chile', 'cn': 'China',
351
+ 'co': 'Colombia', 'km': 'Comoros', 'cg': 'Congo', 'cr': 'Costa Rica',
352
+ 'hr': 'Croatia', 'cu': 'Cuba', 'cy': 'Cyprus', 'cz': 'Czech Republic',
353
+ 'dk': 'Denmark', 'dj': 'Djibouti', 'dm': 'Dominica', 'do': 'Dominican Republic',
354
+ 'ec': 'Ecuador', 'eg': 'Egypt', 'sv': 'El Salvador', 'gq': 'Equatorial Guinea',
355
+ 'er': 'Eritrea', 'ee': 'Estonia', 'et': 'Ethiopia', 'fj': 'Fiji',
356
+ 'fi': 'Finland', 'fr': 'France', 'ga': 'Gabon', 'gm': 'Gambia',
357
+ 'ge': 'Georgia', 'de': 'Germany', 'gh': 'Ghana', 'gr': 'Greece',
358
+ 'gd': 'Grenada', 'gt': 'Guatemala', 'gn': 'Guinea', 'gw': 'Guinea-Bissau',
359
+ 'gy': 'Guyana', 'ht': 'Haiti', 'hn': 'Honduras', 'hu': 'Hungary',
360
+ 'is': 'Iceland', 'in': 'India', 'id': 'Indonesia', 'ir': 'Iran',
361
+ 'iq': 'Iraq', 'ie': 'Ireland', 'il': 'Israel', 'it': 'Italy',
362
+ 'jm': 'Jamaica', 'jp': 'Japan', 'jo': 'Jordan', 'kz': 'Kazakhstan',
363
+ 'ke': 'Kenya', 'ki': 'Kiribati', 'kp': 'North Korea', 'kr': 'South Korea',
364
+ 'kw': 'Kuwait', 'kg': 'Kyrgyzstan', 'la': 'Laos', 'lv': 'Latvia',
365
+ 'lb': 'Lebanon', 'ls': 'Lesotho', 'lr': 'Liberia', 'ly': 'Libya',
366
+ 'li': 'Liechtenstein', 'lt': 'Lithuania', 'lu': 'Luxembourg', 'mk': 'Macedonia',
367
+ 'mg': 'Madagascar', 'mw': 'Malawi', 'my': 'Malaysia', 'mv': 'Maldives',
368
+ 'ml': 'Mali', 'mt': 'Malta', 'mh': 'Marshall Islands', 'mr': 'Mauritania',
369
+ 'mu': 'Mauritius', 'mx': 'Mexico', 'fm': 'Micronesia', 'md': 'Moldova',
370
+ 'mc': 'Monaco', 'mn': 'Mongolia', 'me': 'Montenegro', 'ma': 'Morocco',
371
+ 'mz': 'Mozambique', 'mm': 'Myanmar', 'na': 'Namibia', 'nr': 'Nauru',
372
+ 'np': 'Nepal', 'nl': 'Netherlands', 'nz': 'New Zealand', 'ni': 'Nicaragua',
373
+ 'ne': 'Niger', 'ng': 'Nigeria', 'no': 'Norway', 'om': 'Oman',
374
+ 'pk': 'Pakistan', 'pw': 'Palau', 'pa': 'Panama', 'pg': 'Papua New Guinea',
375
+ 'py': 'Paraguay', 'pe': 'Peru', 'ph': 'Philippines', 'pl': 'Poland',
376
+ 'pt': 'Portugal', 'qa': 'Qatar', 'ro': 'Romania', 'ru': 'Russia',
377
+ 'rw': 'Rwanda', 'kn': 'Saint Kitts and Nevis', 'lc': 'Saint Lucia',
378
+ 'vc': 'Saint Vincent and the Grenadines', 'ws': 'Samoa', 'sm': 'San Marino',
379
+ 'st': 'Sao Tome and Principe', 'sa': 'Saudi Arabia', 'sn': 'Senegal',
380
+ 'rs': 'Serbia', 'sc': 'Seychelles', 'sl': 'Sierra Leone', 'sg': 'Singapore',
381
+ 'sk': 'Slovakia', 'si': 'Slovenia', 'sb': 'Solomon Islands', 'so': 'Somalia',
382
+ 'za': 'South Africa', 'es': 'Spain', 'lk': 'Sri Lanka', 'sd': 'Sudan',
383
+ 'sr': 'Suriname', 'sz': 'Swaziland', 'se': 'Sweden', 'ch': 'Switzerland',
384
+ 'sy': 'Syria', 'tw': 'Taiwan', 'tj': 'Tajikistan', 'tz': 'Tanzania',
385
+ 'th': 'Thailand', 'tl': 'Timor-Leste', 'tg': 'Togo', 'to': 'Tonga',
386
+ 'tt': 'Trinidad and Tobago', 'tn': 'Tunisia', 'tr': 'Turkey', 'tm': 'Turkmenistan',
387
+ 'tv': 'Tuvalu', 'ug': 'Uganda', 'ua': 'Ukraine', 'ae': 'United Arab Emirates',
388
+ 'gb': 'United Kingdom', 'us': 'United States', 'uy': 'Uruguay', 'uz': 'Uzbekistan',
389
+ 'vu': 'Vanuatu', 've': 'Venezuela', 'vn': 'Vietnam', 'ye': 'Yemen',
390
+ 'zm': 'Zambia', 'zw': 'Zimbabwe'
391
+ }
392
+
393
+ if __name__ == "__main__":
394
+ asyncio.run(main())