mhdzumair's picture
Update headers to consistent lowercase format to fix duplicate headers
8584387
from abc import ABC, abstractmethod
from typing import Dict, Tuple, Optional
import httpx
from mediaflow_proxy.configs import settings
class BaseExtractor(ABC):
"""Base class for all URL extractors."""
def __init__(self, proxy_enabled: bool, request_headers: dict):
self.proxy_url = settings.proxy_url if proxy_enabled else None
self.base_headers = {
"user-agent": settings.user_agent,
"accept-language": "en-US,en;q=0.5",
}
self.base_headers.update(request_headers)
async def _make_request(
self, url: str, headers: Optional[Dict] = None, follow_redirects: bool = True, **kwargs
) -> httpx.Response:
"""Make HTTP request with error handling."""
try:
async with httpx.AsyncClient(proxy=self.proxy_url) as client:
request_headers = self.base_headers
request_headers.update(headers or {})
response = await client.get(
url,
headers=request_headers,
follow_redirects=follow_redirects,
timeout=30,
**kwargs,
)
response.raise_for_status()
return response
except httpx.HTTPError as e:
raise ValueError(f"HTTP request failed: {str(e)}")
except Exception as e:
raise ValueError(f"Request failed: {str(e)}")
@abstractmethod
async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
"""Extract final URL and required headers."""
pass