| |
| """ |
| Configuration file for Scira Reverse API |
| Centralized settings for rate limiting, proxies, and bypass strategies |
| Supports environment variables for Hugging Face deployment |
| """ |
| import os |
| from typing import List, Dict, Any |
|
|
| |
| def get_proxy_configs() -> List[Dict[str, Any]]: |
| """Get proxy configurations from environment variables or defaults""" |
| proxy_configs = [] |
|
|
| |
| if os.getenv("PROXY_ENABLED", "false").lower() == "true": |
| proxy_host = os.getenv("PROXY_HOST", "6b3b12f886551090.ika.na.pyproxy.io") |
| proxy_port = int(os.getenv("PROXY_PORT", "16666")) |
| proxy_username = os.getenv("PROXY_USERNAME", "Y21wFw2-zone-resi") |
| proxy_password = os.getenv("PROXY_PASSWORD", "Y21wFw2") |
|
|
| proxy_configs.append({ |
| "name": "Primary Residential Proxy", |
| "host": proxy_host, |
| "port": proxy_port, |
| "username": proxy_username, |
| "password": proxy_password, |
| "type": "residential", |
| "enabled": True |
| }) |
|
|
| return proxy_configs |
|
|
| PROXY_CONFIGS = get_proxy_configs() |
|
|
| |
| RATE_LIMITING = { |
| "min_delay": float(os.getenv("MIN_DELAY", "0.5")), |
| "max_delay": float(os.getenv("MAX_DELAY", "3.0")), |
| "request_timeout": int(os.getenv("REQUEST_TIMEOUT", "120")), |
| "max_retries": int(os.getenv("MAX_RETRIES", "3")), |
| "backoff_factor": float(os.getenv("BACKOFF_FACTOR", "2.0")), |
| "burst_protection": { |
| "enabled": os.getenv("BURST_PROTECTION", "true").lower() == "true", |
| "max_requests_per_minute": int(os.getenv("MAX_REQUESTS_PER_MINUTE", "20")), |
| "cooldown_period": int(os.getenv("COOLDOWN_PERIOD", "60")) |
| } |
| } |
|
|
| |
| USER_AGENTS = [ |
| |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36", |
| |
| |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0", |
| |
| |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", |
| |
| |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15", |
| |
| |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36", |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36", |
| |
| |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0", |
| ] |
|
|
| |
| SEC_CH_UA_OPTIONS = [ |
| '"Not;A=Brand";v="99", "Google Chrome";v="139", "Chromium";v="139"', |
| '"Not;A=Brand";v="99", "Google Chrome";v="138", "Chromium";v="138"', |
| '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', |
| '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', |
| '"Microsoft Edge";v="139", "Not;A=Brand";v="99", "Chromium";v="139"', |
| '"Google Chrome";v="139", "Not;A=Brand";v="99", "Chromium";v="139"' |
| ] |
|
|
| PLATFORMS = [ |
| '"Windows"', |
| '"macOS"', |
| '"Linux"' |
| ] |
|
|
| ACCEPT_LANGUAGES = [ |
| "en-US,en;q=0.9", |
| "en-US,en;q=0.8,es;q=0.7", |
| "en-GB,en;q=0.9,en-US;q=0.8", |
| "en-US,en;q=0.9,fr;q=0.8", |
| "en-US,en;q=0.7" |
| ] |
|
|
| |
| def get_session_tokens() -> List[str]: |
| """Get session tokens from environment variables or defaults""" |
| env_token = os.getenv("SCIRA_SESSION_TOKEN") |
| if env_token: |
| return [env_token] |
|
|
| |
| return ["QZkIjAJihMAA2Eju4uV54BpqwOFScwUl.Ptbu8Y%2FO%2BU4%2BimLLh6unpkg%2FMGdvFThciJvInvNAV8Y%3D"] |
|
|
| def get_posthog_tokens() -> List[str]: |
| """Get PostHog tokens from environment variables or defaults""" |
| env_token = os.getenv("SCIRA_POSTHOG_TOKEN") |
| if env_token: |
| return [env_token] |
|
|
| |
| return ["%7B%22distinct_id%22%3A%2201970149-15a6-74b8-8abd-93a5932c0b14%22%2C%22%24sesid%22%3A%5B1754648371696%2C%220198891e-aee2-7b9f-962c-305a2214e83c%22%2C1754647146210%5D%2C%22%24epp%22%3Atrue%2C%22%24initial_person_info%22%3A%7B%22r%22%3A%22https%3A%2F%2Fscira.ai%2F%22%2C%22u%22%3A%22https%3A%2F%2Fscira.ai%2F%22%7D%7D"] |
|
|
| SESSION_TOKENS = get_session_tokens() |
| POSTHOG_TOKENS = get_posthog_tokens() |
|
|
| |
| BYPASS_CONFIG = { |
| "proxy_rotation": { |
| "enabled": True, |
| "rotate_every_n_requests": 10, |
| "fallback_to_direct": True |
| }, |
| "header_spoofing": { |
| "enabled": True, |
| "rotate_user_agent": True, |
| "rotate_sec_ch_ua": True, |
| "rotate_platform": True, |
| "rotate_accept_language": True, |
| "add_random_headers": True |
| }, |
| "ip_spoofing": { |
| "enabled": True, |
| "x_forwarded_for": True, |
| "x_real_ip": True, |
| "x_originating_ip": True |
| }, |
| "session_rotation": { |
| "enabled": False, |
| "rotate_every_n_requests": 5 |
| }, |
| "request_timing": { |
| "randomize_delays": True, |
| "human_like_patterns": True, |
| "avoid_burst_detection": True |
| } |
| } |
|
|
| |
| LOGGING_CONFIG = { |
| "level": "INFO", |
| "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
| "log_requests": True, |
| "log_responses": False, |
| "log_proxy_usage": True, |
| "log_rate_limiting": True |
| } |
|
|
| |
| SCIRA_CONFIG = { |
| "base_url": "https://scira.ai/api", |
| "search_endpoint": "/search", |
| "timeout": 120, |
| "max_retries": 3 |
| } |
|
|
| |
| SERVER_CONFIG = { |
| "host": os.getenv("HOST", "0.0.0.0"), |
| "port": int(os.getenv("PORT", "7860")), |
| "debug": os.getenv("DEBUG", "false").lower() == "true", |
| "cors_enabled": os.getenv("CORS_ENABLED", "true").lower() == "true", |
| "cors_origins": os.getenv("CORS_ORIGINS", "*").split(","), |
| "max_request_size": int(os.getenv("MAX_REQUEST_SIZE", str(10 * 1024 * 1024))), |
| "keepalive_timeout": int(os.getenv("KEEPALIVE_TIMEOUT", "65")) |
| } |
|
|
| def get_active_proxies(): |
| """Get list of enabled proxy configurations""" |
| return [proxy for proxy in PROXY_CONFIGS if proxy.get("enabled", False)] |
|
|
| def get_primary_proxy(): |
| """Get the primary proxy configuration""" |
| active_proxies = get_active_proxies() |
| return active_proxies[0] if active_proxies else None |
|
|
| def validate_config(): |
| """Validate configuration settings""" |
| errors = [] |
| |
| |
| if not get_active_proxies(): |
| errors.append("No active proxy configurations found") |
| |
| |
| if RATE_LIMITING["min_delay"] >= RATE_LIMITING["max_delay"]: |
| errors.append("min_delay must be less than max_delay") |
| |
| |
| if not SESSION_TOKENS: |
| errors.append("No session tokens configured") |
| |
| return errors |
|
|
| if __name__ == "__main__": |
| |
| errors = validate_config() |
| if errors: |
| print("❌ Configuration errors found:") |
| for error in errors: |
| print(f" - {error}") |
| else: |
| print("✅ Configuration is valid") |
| print(f"Active proxies: {len(get_active_proxies())}") |
| print(f"Session tokens: {len(SESSION_TOKENS)}") |
| print(f"User agents: {len(USER_AGENTS)}") |
|
|