mrfirdauss commited on
Commit
42a8301
Β·
1 Parent(s): 3f2a181

feat: add multiple source

Browse files
app/util/visa_availability_scraper_playwright.py CHANGED
@@ -4,7 +4,7 @@ from typing import Dict, Optional, List
4
  from playwright.async_api import async_playwright
5
  import pandas as pd
6
 
7
- from app.util.constant import COUNTRY_CODES, REVERSE_COUNTRY_CODES
8
  class PassportIndexVisaScraper:
9
  def __init__(self, debug: bool = True):
10
  """
@@ -287,7 +287,44 @@ class PassportIndexVisaScraper:
287
  await asyncio.sleep(delay)
288
 
289
  return results
290
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  def format_result(self, result: Dict) -> str:
292
  """Format a single result for display"""
293
  if not result:
@@ -383,5 +420,52 @@ async def indo():
383
  df = pd.DataFrame(l)
384
  df.to_csv('visa_avaibility_playwright.csv', index=False)
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  if __name__ == "__main__":
387
- asyncio.run(main())
 
4
  from playwright.async_api import async_playwright
5
  import pandas as pd
6
 
7
+ from constant import COUNTRY_CODES, REVERSE_COUNTRY_CODES
8
  class PassportIndexVisaScraper:
9
  def __init__(self, debug: bool = True):
10
  """
 
287
  await asyncio.sleep(delay)
288
 
289
  return results
290
+ async def check_multiple_source(self, passport_countries: List[str], destination: str, delay: float = 2.0) -> Dict:
291
+ """
292
+ Check visa requirements for multiple passport countries to a single destination.
293
+
294
+ Args:
295
+ passport_countries: List of two-letter country codes for passports.
296
+ destination: Two-letter country code for the destination.
297
+ delay: Delay between requests in seconds.
298
+
299
+ Returns:
300
+ Dictionary mapping passport country codes to visa information.
301
+ """
302
+ results = {}
303
+
304
+ for i, passport in enumerate(passport_countries, 1):
305
+ print(f"\n[{i}/{len(passport_countries)}] Checking {passport.upper()} β†’ {destination.upper()}...")
306
+
307
+ # Try API method first
308
+ result = await self.check_visa_requirement_browser(passport, destination)
309
+
310
+ # If API fails, try interactive method
311
+ if not result:
312
+ result = await self.check_visa_interactive(passport, destination)
313
+
314
+ if result:
315
+ results[passport] = result
316
+ text = result.get('text', 'No text available')
317
+ print(f" βœ… Result: {text}")
318
+ else:
319
+ results[passport] = None
320
+ print(f" ❌ Failed to get result")
321
+
322
+ # Rate limiting to avoid blocking
323
+ if i < len(passport_countries):
324
+ print(f" ⏳ Waiting {delay} seconds...")
325
+ await asyncio.sleep(delay)
326
+
327
+ return results
328
  def format_result(self, result: Dict) -> str:
329
  """Format a single result for display"""
330
  if not result:
 
420
  df = pd.DataFrame(l)
421
  df.to_csv('visa_avaibility_playwright.csv', index=False)
422
 
423
+
424
+
425
+
426
+ async def asean():
427
+ print("="*60)
428
+ print(" Passport Index Visa Checker (Playwright)")
429
+ print("="*60)
430
+
431
+ async with PassportIndexVisaScraper(debug=True) as scraper:
432
+ l = []
433
+ if not await scraper.initialize_session():
434
+ print("❌ Failed to initialize session")
435
+ return
436
+ mains = ['kh', 'th', 'vn', 'la', 'mm', 'tl', 'ph', 'bn', 'my']
437
+ destinations = list(COUNTRY_CODES.values())
438
+ for main in mains:
439
+ results = await scraper.check_multiple_destinations(main, destinations, delay=2.0)
440
+ for dest, result in results.items():
441
+ if result:
442
+ dic = {}
443
+ dic['text'] = result.get('text', 'N/A')
444
+ dic['days'] = result.get('days', 'N/A')
445
+ dic['pass'] = result.get('pass', 'N/A')
446
+ dic['dest'] = REVERSE_COUNTRY_CODES[dest]
447
+ dic['source'] = REVERSE_COUNTRY_CODES[main]
448
+ l.append(dic)
449
+ print(f"{main.upper()} -> {dest.upper()}: {result.get('text', 'N/A')}")
450
+ else:
451
+ print(f" ❌ {main.upper()} β†’ {dest.upper()}: Failed")
452
+ results = await scraper.check_multiple_source(destinations, main, delay=2.0)
453
+ for passport, result in results.items():
454
+ if result:
455
+ dic = {}
456
+ dic['text'] = result.get('text', 'N/A')
457
+ dic['days'] = result.get('days', 'N/A')
458
+ dic['pass'] = result.get('pass', 'N/A')
459
+ dic['dest'] = REVERSE_COUNTRY_CODES[main]
460
+ dic['source'] = REVERSE_COUNTRY_CODES[passport]
461
+ l.append(dic)
462
+ print(f"{passport.upper()} -> {main.upper()}: {result.get('text', 'N/A')}")
463
+ else:
464
+ print(f" ❌ {passport.upper()} β†’ {main.upper()}: Failed")
465
+
466
+ # save to csv
467
+ df = pd.DataFrame(l)
468
+ df.to_csv('asean_visa_avaibility_playwright.csv', index=False)
469
+
470
  if __name__ == "__main__":
471
+ asyncio.run(asean())
server.py CHANGED
@@ -8,7 +8,7 @@ import json
8
 
9
  from app.util.gen_ai_base import GenAIBaseClient
10
  from app.util.browser_agent import BrowserAgent
11
- from app.util.visa_availability_scraper_playwright import PassportIndexVisaScraper
12
  import sys
13
  sys.stdout.reconfigure(line_buffering=True)
14
 
 
8
 
9
  from app.util.gen_ai_base import GenAIBaseClient
10
  from app.util.browser_agent import BrowserAgent
11
+ # from app.util.visa_availability_scraper_playwright import PassportIndexVisaScraper
12
  import sys
13
  sys.stdout.reconfigure(line_buffering=True)
14