Spaces:
Build error
Build error
Commit Β·
42a8301
1
Parent(s): 3f2a181
feat: add multiple source
Browse files
app/util/visa_availability_scraper_playwright.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Dict, Optional, List
|
|
| 4 |
from playwright.async_api import async_playwright
|
| 5 |
import pandas as pd
|
| 6 |
|
| 7 |
-
from
|
| 8 |
class PassportIndexVisaScraper:
|
| 9 |
def __init__(self, debug: bool = True):
|
| 10 |
"""
|
|
@@ -287,7 +287,44 @@ class PassportIndexVisaScraper:
|
|
| 287 |
await asyncio.sleep(delay)
|
| 288 |
|
| 289 |
return results
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
def format_result(self, result: Dict) -> str:
|
| 292 |
"""Format a single result for display"""
|
| 293 |
if not result:
|
|
@@ -383,5 +420,52 @@ async def indo():
|
|
| 383 |
df = pd.DataFrame(l)
|
| 384 |
df.to_csv('visa_avaibility_playwright.csv', index=False)
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
if __name__ == "__main__":
|
| 387 |
-
asyncio.run(
|
|
|
|
| 4 |
from playwright.async_api import async_playwright
|
| 5 |
import pandas as pd
|
| 6 |
|
| 7 |
+
from constant import COUNTRY_CODES, REVERSE_COUNTRY_CODES
|
| 8 |
class PassportIndexVisaScraper:
|
| 9 |
def __init__(self, debug: bool = True):
|
| 10 |
"""
|
|
|
|
| 287 |
await asyncio.sleep(delay)
|
| 288 |
|
| 289 |
return results
|
| 290 |
+
async def check_multiple_source(self, passport_countries: List[str], destination: str, delay: float = 2.0) -> Dict:
|
| 291 |
+
"""
|
| 292 |
+
Check visa requirements for multiple passport countries to a single destination.
|
| 293 |
+
|
| 294 |
+
Args:
|
| 295 |
+
passport_countries: List of two-letter country codes for passports.
|
| 296 |
+
destination: Two-letter country code for the destination.
|
| 297 |
+
delay: Delay between requests in seconds.
|
| 298 |
+
|
| 299 |
+
Returns:
|
| 300 |
+
Dictionary mapping passport country codes to visa information.
|
| 301 |
+
"""
|
| 302 |
+
results = {}
|
| 303 |
+
|
| 304 |
+
for i, passport in enumerate(passport_countries, 1):
|
| 305 |
+
print(f"\n[{i}/{len(passport_countries)}] Checking {passport.upper()} β {destination.upper()}...")
|
| 306 |
+
|
| 307 |
+
# Try API method first
|
| 308 |
+
result = await self.check_visa_requirement_browser(passport, destination)
|
| 309 |
+
|
| 310 |
+
# If API fails, try interactive method
|
| 311 |
+
if not result:
|
| 312 |
+
result = await self.check_visa_interactive(passport, destination)
|
| 313 |
+
|
| 314 |
+
if result:
|
| 315 |
+
results[passport] = result
|
| 316 |
+
text = result.get('text', 'No text available')
|
| 317 |
+
print(f" β
Result: {text}")
|
| 318 |
+
else:
|
| 319 |
+
results[passport] = None
|
| 320 |
+
print(f" β Failed to get result")
|
| 321 |
+
|
| 322 |
+
# Rate limiting to avoid blocking
|
| 323 |
+
if i < len(passport_countries):
|
| 324 |
+
print(f" β³ Waiting {delay} seconds...")
|
| 325 |
+
await asyncio.sleep(delay)
|
| 326 |
+
|
| 327 |
+
return results
|
| 328 |
def format_result(self, result: Dict) -> str:
|
| 329 |
"""Format a single result for display"""
|
| 330 |
if not result:
|
|
|
|
| 420 |
df = pd.DataFrame(l)
|
| 421 |
df.to_csv('visa_avaibility_playwright.csv', index=False)
|
| 422 |
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
async def asean():
|
| 427 |
+
print("="*60)
|
| 428 |
+
print(" Passport Index Visa Checker (Playwright)")
|
| 429 |
+
print("="*60)
|
| 430 |
+
|
| 431 |
+
async with PassportIndexVisaScraper(debug=True) as scraper:
|
| 432 |
+
l = []
|
| 433 |
+
if not await scraper.initialize_session():
|
| 434 |
+
print("β Failed to initialize session")
|
| 435 |
+
return
|
| 436 |
+
mains = ['kh', 'th', 'vn', 'la', 'mm', 'tl', 'ph', 'bn', 'my']
|
| 437 |
+
destinations = list(COUNTRY_CODES.values())
|
| 438 |
+
for main in mains:
|
| 439 |
+
results = await scraper.check_multiple_destinations(main, destinations, delay=2.0)
|
| 440 |
+
for dest, result in results.items():
|
| 441 |
+
if result:
|
| 442 |
+
dic = {}
|
| 443 |
+
dic['text'] = result.get('text', 'N/A')
|
| 444 |
+
dic['days'] = result.get('days', 'N/A')
|
| 445 |
+
dic['pass'] = result.get('pass', 'N/A')
|
| 446 |
+
dic['dest'] = REVERSE_COUNTRY_CODES[dest]
|
| 447 |
+
dic['source'] = REVERSE_COUNTRY_CODES[main]
|
| 448 |
+
l.append(dic)
|
| 449 |
+
print(f"{main.upper()} -> {dest.upper()}: {result.get('text', 'N/A')}")
|
| 450 |
+
else:
|
| 451 |
+
print(f" β {main.upper()} β {dest.upper()}: Failed")
|
| 452 |
+
results = await scraper.check_multiple_source(destinations, main, delay=2.0)
|
| 453 |
+
for passport, result in results.items():
|
| 454 |
+
if result:
|
| 455 |
+
dic = {}
|
| 456 |
+
dic['text'] = result.get('text', 'N/A')
|
| 457 |
+
dic['days'] = result.get('days', 'N/A')
|
| 458 |
+
dic['pass'] = result.get('pass', 'N/A')
|
| 459 |
+
dic['dest'] = REVERSE_COUNTRY_CODES[main]
|
| 460 |
+
dic['source'] = REVERSE_COUNTRY_CODES[passport]
|
| 461 |
+
l.append(dic)
|
| 462 |
+
print(f"{passport.upper()} -> {main.upper()}: {result.get('text', 'N/A')}")
|
| 463 |
+
else:
|
| 464 |
+
print(f" β {passport.upper()} β {main.upper()}: Failed")
|
| 465 |
+
|
| 466 |
+
# save to csv
|
| 467 |
+
df = pd.DataFrame(l)
|
| 468 |
+
df.to_csv('asean_visa_avaibility_playwright.csv', index=False)
|
| 469 |
+
|
| 470 |
if __name__ == "__main__":
|
| 471 |
+
asyncio.run(asean())
|
server.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
| 8 |
|
| 9 |
from app.util.gen_ai_base import GenAIBaseClient
|
| 10 |
from app.util.browser_agent import BrowserAgent
|
| 11 |
-
from app.util.visa_availability_scraper_playwright import PassportIndexVisaScraper
|
| 12 |
import sys
|
| 13 |
sys.stdout.reconfigure(line_buffering=True)
|
| 14 |
|
|
|
|
| 8 |
|
| 9 |
from app.util.gen_ai_base import GenAIBaseClient
|
| 10 |
from app.util.browser_agent import BrowserAgent
|
| 11 |
+
# from app.util.visa_availability_scraper_playwright import PassportIndexVisaScraper
|
| 12 |
import sys
|
| 13 |
sys.stdout.reconfigure(line_buffering=True)
|
| 14 |
|