Spaces:

Xuvas
/

mediaflow-proxy

Paused

mhdzumair commited on Nov 10, 2024

Commit

29348f5

1 Parent(s): 3057a00

Refactor extractors and routes for improved structure

Reorganized extractor routes into dedicated modules for better maintainability. Introduced a base class for extractors, enabling consistent request handling across different services. Additionally, updated configurations and error handling in extractors, enhancing code readability and robustness.

Files changed (14) hide show

mediaflow_proxy/configs.py +4 -0
mediaflow_proxy/extractors/__init__.py +0 -0
mediaflow_proxy/extractors/base.py +42 -0
mediaflow_proxy/extractors/doodstream.py +33 -24
mediaflow_proxy/extractors/factory.py +24 -0
mediaflow_proxy/extractors/mixdrop.py +24 -20
mediaflow_proxy/extractors/uqload.py +13 -9
mediaflow_proxy/extractors_routes.py +0 -55
mediaflow_proxy/main.py +6 -8
mediaflow_proxy/routes/__init__.py +2 -0
mediaflow_proxy/routes/extractor.py +32 -0
mediaflow_proxy/{routes.py → routes/proxy.py} +16 -3
mediaflow_proxy/schemas.py +8 -0
mediaflow_proxy/utils/http_utils.py +4 -0

mediaflow_proxy/configs.py CHANGED Viewed

@@ -6,6 +6,10 @@ class Settings(BaseSettings):
     proxy_url: str | None = None  # The URL of the proxy server to route requests through.
     enable_streaming_progress: bool = False  # Whether to enable streaming progress tracking.
     class Config:
         env_file = ".env"
         extra = "ignore"

     proxy_url: str | None = None  # The URL of the proxy server to route requests through.
     enable_streaming_progress: bool = False  # Whether to enable streaming progress tracking.
+    user_agent: str = (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"  # The user agent to use for HTTP requests.
+    )
     class Config:
         env_file = ".env"
         extra = "ignore"

mediaflow_proxy/extractors/__init__.py ADDED Viewed

File without changes

mediaflow_proxy/extractors/base.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from abc import ABC, abstractmethod
+from typing import Dict, Tuple, Optional
+import httpx
+from mediaflow_proxy.configs import settings
+class BaseExtractor(ABC):
+    """Base class for all URL extractors."""
+    def __init__(self, proxy_enabled: bool = False):
+        self.proxy_url = settings.proxy_url if proxy_enabled else None
+        self.base_headers = {
+            "User-Agent": settings.user_agent,
+            "Accept-Language": "en-US,en;q=0.5",
+        }
+    async def _make_request(
+        self, url: str, headers: Optional[Dict] = None, follow_redirects: bool = True, **kwargs
+    ) -> httpx.Response:
+        """Make HTTP request with error handling."""
+        try:
+            async with httpx.AsyncClient(proxy=self.proxy_url) as client:
+                response = await client.get(
+                    url,
+                    headers={**self.base_headers, **(headers or {})},
+                    follow_redirects=follow_redirects,
+                    timeout=30,
+                    **kwargs,
+                )
+                response.raise_for_status()
+                return response
+        except httpx.HTTPError as e:
+            raise ValueError(f"HTTP request failed: {str(e)}")
+        except Exception as e:
+            raise ValueError(f"Request failed: {str(e)}")
+    @abstractmethod
+    async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
+        """Extract final URL and required headers."""
+        pass

mediaflow_proxy/extractors/doodstream.py CHANGED Viewed

@@ -1,25 +1,34 @@
-import httpx
-import time
 import re
-from mediaflow_proxy.configs import settings
-async def doodstream_url(d: str, use_request_proxy: bool):
-    async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
-        headers = {
-            "Range": "bytes=0-",
-            "Referer": "https://d000d.com/",
-        }
-        response = await client.get(d, follow_redirects=True)
-        if response.status_code == 200:
-            # Get unique timestamp for the request
-            real_time = str(int(time.time()))
-            pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
-            match = re.search(pattern, response.text, re.DOTALL)
-            if match:
-                url = f"https://d000d.com{match[1]}"
-                rebobo = await client.get(url, headers=headers, follow_redirects=True)
-                final_url = f"{rebobo.text}123456789{match[2]}{real_time}"
-                doodstream_dict = {"Referer": "https://d000d.com/"}
-                return final_url, doodstream_dict

 import re
+import time
+from typing import Tuple, Dict
+from mediaflow_proxy.extractors.base import BaseExtractor
+class DoodStreamExtractor(BaseExtractor):
+    """DoodStream URL extractor."""
+    def __init__(self, proxy_enabled: bool = False):
+        super().__init__(proxy_enabled)
+        self.base_url = "https://d000d.com"
+    async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
+        """Extract DoodStream URL."""
+        response = await self._make_request(url)
+        # Extract URL pattern
+        pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
+        match = re.search(pattern, response.text, re.DOTALL)
+        if not match:
+            raise ValueError("Failed to extract URL pattern")
+        # Build final URL
+        pass_url = f"{self.base_url}{match[1]}"
+        referer = f"{self.base_url}/"
+        headers = {"Range": "bytes=0-", "Referer": referer}
+        rebobo_response = await self._make_request(pass_url, headers=headers)
+        timestamp = str(int(time.time()))
+        final_url = f"{rebobo_response.text}123456789{match[2]}{timestamp}"
+        return final_url, {"Referer": referer}

mediaflow_proxy/extractors/factory.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Dict, Type
+from mediaflow_proxy.extractors.base import BaseExtractor
+from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
+from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
+from mediaflow_proxy.extractors.uqload import UqloadExtractor
+class ExtractorFactory:
+    """Factory for creating URL extractors."""
+    _extractors: Dict[str, Type[BaseExtractor]] = {
+        "Doodstream": DoodStreamExtractor,
+        "Uqload": UqloadExtractor,
+        "Mixdrop": MixdropExtractor,
+    }
+    @classmethod
+    def get_extractor(cls, host: str, proxy_enabled: bool = False) -> BaseExtractor:
+        """Get appropriate extractor instance for the given host."""
+        extractor_class = cls._extractors.get(host)
+        if not extractor_class:
+            raise ValueError(f"Unsupported host: {host}")
+        return extractor_class(proxy_enabled)

mediaflow_proxy/extractors/mixdrop.py CHANGED Viewed

@@ -1,27 +1,31 @@
-import httpx
 import re
 import string
-from mediaflow_proxy.configs import settings
-async def mixdrop_url(d: str, use_request_proxy: bool):
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.10; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
-        "Accept-Language": "en-US,en;q=0.5",
-    }
-    async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
-        response = await client.get(d, headers=headers, follow_redirects=True, timeout=30)
-        [s1, s2] = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text).group(1, 2)
         schema = s1.split(";")[2][5:-1]
         terms = s2.split("|")
         charset = string.digits + string.ascii_letters
-        d = dict()
-        for i in range(len(terms)):
-            d[charset[i]] = terms[i] or charset[i]
-        final_url = "https:"
-        for c in schema:
-            final_url += d[c] if c in d else c
-        headers_dict = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.10; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
-        }
-    return final_url, headers_dict

 import re
 import string
+from typing import Dict, Tuple
+from mediaflow_proxy.extractors.base import BaseExtractor
+class MixdropExtractor(BaseExtractor):
+    """Mixdrop URL extractor."""
+    async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
+        """Extract Mixdrop URL."""
+        response = await self._make_request(url)
+        # Extract and decode URL
+        match = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text)
+        if not match:
+            raise ValueError("Failed to extract URL components")
+        s1, s2 = match.group(1, 2)
         schema = s1.split(";")[2][5:-1]
         terms = s2.split("|")
+        # Build character mapping
         charset = string.digits + string.ascii_letters
+        char_map = {charset[i]: terms[i] or charset[i] for i in range(len(terms))}
+        # Construct final URL
+        final_url = "https:" + "".join(char_map.get(c, c) for c in schema)
+        return final_url, {"User-Agent": self.base_headers["User-Agent"]}

mediaflow_proxy/extractors/uqload.py CHANGED Viewed

@@ -1,14 +1,18 @@
-import httpx
 import re
-from mediaflow_proxy.configs import settings
-async def uqload_url(d: str, use_request_proxy: bool):
-    async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
-        response = await client.get(d, follow_redirects=True)
         video_url_match = re.search(r'sources: \["(.*?)"\]', response.text)
-        if video_url_match:
-            final_url = video_url_match.group(1)
-        uqload_dict = {"Referer": "https://uqload.to/"}
-        return final_url, uqload_dict

 import re
+from typing import Dict, Tuple
+from mediaflow_proxy.extractors.base import BaseExtractor
+class UqloadExtractor(BaseExtractor):
+    """Uqload URL extractor."""
+    async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
+        """Extract Uqload URL."""
+        response = await self._make_request(url)
         video_url_match = re.search(r'sources: \["(.*?)"\]', response.text)
+        if not video_url_match:
+            raise ValueError("Failed to extract video URL")
+        return video_url_match.group(1), {"Referer": "https://uqload.to/"}

mediaflow_proxy/extractors_routes.py DELETED Viewed

@@ -1,55 +0,0 @@
-from fastapi import APIRouter, Query
-from fastapi.responses import JSONResponse, RedirectResponse
-from .extractors.doodstream import doodstream_url
-from .extractors.uqload import uqload_url
-from .extractors.mixdrop import mixdrop_url
-from mediaflow_proxy.configs import settings
-extractor_router = APIRouter()
-host_map = {"Doodstream": doodstream_url, "Mixdrop": mixdrop_url, "Uqload": uqload_url}
-@extractor_router.get("/extractor")
-async def doodstream_extractor(
-    d: str = Query(..., description="Extract Clean Link from various Hosts"),
-    use_request_proxy: bool = Query(False, description="Whether to use the MediaFlow proxy configuration."),
-    host: str = Query(
-        ..., description='From which Host the URL comes from, here avaiable ones: "Doodstream","Mixdrop","Uqload"'
-    ),
-    redirect_stream: bool = Query(
-        False,
-        description="If enabled the response will be redirected to stream endpoint automatically and the stream will be proxied",
-    ),
-):
-    """
-    Extract a clean link from DoodStream,Mixdrop,Uqload
-    Args: request (Request): The incoming HTTP request
-    Returns: The clean link (url) and the headers needed to access the url
-    N.B. You can't use a rotating proxy if type is set to "Doodstream"
-    """
-    try:
-        final_url, headers_dict = await host_map[host](d, use_request_proxy)
-    except Exception as e:
-        return JSONResponse(content={"error": str(e)})
-    if redirect_stream == True:
-        formatted_headers = format_headers(headers_dict)
-        redirected_stream = f"/proxy/stream?api_password={settings.api_password}&d={final_url}&{formatted_headers}"
-        return RedirectResponse(url=redirected_stream)
-    elif redirect_stream == False:
-        return JSONResponse(content={"url": final_url, "headers": headers_dict})
-def format_headers(headers):
-    """
-    Format the headers dictionary into a query string format with 'h_' prefix.
-    Args:
-    - headers: A dictionary of headers.
-    Returns:
-    - A query string formatted string of headers.
-    """
-    return "&".join(f"h_{key}={value}" for key, value in headers.items())

mediaflow_proxy/main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
-from importlib import resources
 import uuid
 from fastapi import FastAPI, Depends, Security, HTTPException, BackgroundTasks
 from fastapi.security import APIKeyQuery, APIKeyHeader
@@ -9,12 +9,11 @@ from starlette.responses import RedirectResponse, JSONResponse
 from starlette.staticfiles import StaticFiles
 from mediaflow_proxy.configs import settings
-from mediaflow_proxy.routes import proxy_router
-from mediaflow_proxy.extractors_routes import extractor_router
 from mediaflow_proxy.schemas import GenerateUrlRequest
 from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
-from mediaflow_proxy.utils.rd_speedtest import run_speedtest, prune_task, results
 from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 app = FastAPI()
@@ -57,7 +56,7 @@ async def trigger_speedtest(background_tasks: BackgroundTasks, api_password: str
     # Generate a random UUID as task_id
     task_id = str(uuid.uuid4())  # Generate unique task ID
     background_tasks.add_task(run_speedtest, task_id)
     # Schedule the task to be pruned after 1 hour
     background_tasks.add_task(prune_task, task_id)
@@ -97,8 +96,7 @@ async def generate_encrypted_or_encoded_url(request: GenerateUrlRequest):
 app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
-app.include_router(extractor_router, tags=["extractors"], dependencies=[Depends(verify_api_key)])
 static_path = resources.files("mediaflow_proxy").joinpath("static")
 app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
@@ -111,4 +109,4 @@ def run():
 if __name__ == "__main__":
-    run()

 import logging
 import uuid
+from importlib import resources
 from fastapi import FastAPI, Depends, Security, HTTPException, BackgroundTasks
 from fastapi.security import APIKeyQuery, APIKeyHeader
 from starlette.staticfiles import StaticFiles
 from mediaflow_proxy.configs import settings
+from mediaflow_proxy.routes import proxy_router, extractor_router
 from mediaflow_proxy.schemas import GenerateUrlRequest
 from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
 from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
+from mediaflow_proxy.utils.rd_speedtest import run_speedtest, prune_task, results
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 app = FastAPI()
     # Generate a random UUID as task_id
     task_id = str(uuid.uuid4())  # Generate unique task ID
     background_tasks.add_task(run_speedtest, task_id)
     # Schedule the task to be pruned after 1 hour
     background_tasks.add_task(prune_task, task_id)
 app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
+app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
 static_path = resources.files("mediaflow_proxy").joinpath("static")
 app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
 if __name__ == "__main__":
+    run()

mediaflow_proxy/routes/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .proxy import proxy_router
2	+ from .extractor import extractor_router

mediaflow_proxy/routes/extractor.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import Annotated
+from fastapi import APIRouter, Query, HTTPException
+from fastapi.responses import RedirectResponse
+from mediaflow_proxy.configs import settings
+from mediaflow_proxy.extractors.factory import ExtractorFactory
+from mediaflow_proxy.schemas import ExtractorURLParams
+extractor_router = APIRouter()
+@extractor_router.get("/video")
+async def extract_url(
+    extractor_params: Annotated[ExtractorURLParams, Query()],
+):
+    """Extract clean links from various video hosting services."""
+    try:
+        extractor = ExtractorFactory.get_extractor(extractor_params.host, extractor_params.use_request_proxy)
+        final_url, headers = await extractor.extract(extractor_params.destination)
+        if extractor_params.redirect_stream:
+            formatted_headers = "&".join(f"h_{k}={v}" for k, v in headers.items())
+            stream_url = f"/proxy/stream?api_password={settings.api_password}&d={final_url}&{formatted_headers}"
+            return RedirectResponse(url=stream_url)
+        return {"url": final_url, "headers": headers}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Extraction failed: {str(e)}")

mediaflow_proxy/{routes.py → routes/proxy.py} RENAMED Viewed

@@ -2,9 +2,22 @@ from typing import Annotated
 from fastapi import Request, Depends, APIRouter, Query, HTTPException
-from .handlers import handle_hls_stream_proxy, proxy_stream, get_manifest, get_playlist, get_segment, get_public_ip
-from .schemas import MPDSegmentParams, MPDPlaylistParams, HLSManifestParams, ProxyStreamParams, MPDManifestParams
-from .utils.http_utils import get_proxy_headers, ProxyRequestHeaders
 proxy_router = APIRouter()

 from fastapi import Request, Depends, APIRouter, Query, HTTPException
+from mediaflow_proxy.handlers import (
+    handle_hls_stream_proxy,
+    proxy_stream,
+    get_manifest,
+    get_playlist,
+    get_segment,
+    get_public_ip,
+)
+from mediaflow_proxy.schemas import (
+    MPDSegmentParams,
+    MPDPlaylistParams,
+    HLSManifestParams,
+    ProxyStreamParams,
+    MPDManifestParams,
+)
+from mediaflow_proxy.utils.http_utils import get_proxy_headers, ProxyRequestHeaders
 proxy_router = APIRouter()

mediaflow_proxy/schemas.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from pydantic import BaseModel, Field, IPvAnyAddress, ConfigDict
@@ -55,3 +57,9 @@ class MPDSegmentParams(GenericParams):
     mime_type: str = Field(..., description="The MIME type of the segment.")
     key_id: str | None = Field(None, description="The DRM key ID (optional).")
     key: str | None = Field(None, description="The DRM key (optional).")

+from typing import Literal
 from pydantic import BaseModel, Field, IPvAnyAddress, ConfigDict
     mime_type: str = Field(..., description="The MIME type of the segment.")
     key_id: str | None = Field(None, description="The DRM key ID (optional).")
     key: str | None = Field(None, description="The DRM key (optional).")
+class ExtractorURLParams(GenericParams):
+    host: Literal["Doodstream", "Mixdrop", "Uqload"] = Field(..., description="The host to extract the URL from.")
+    destination: str = Field(..., description="The URL of the stream.", alias="d")
+    redirect_stream: bool = Field(False, description="Whether to redirect to the stream endpoint automatically.")

mediaflow_proxy/utils/http_utils.py CHANGED Viewed

@@ -125,6 +125,10 @@ class Streamer:
                 else:
                     async for chunk in self.response.aiter_bytes():
                         yield chunk
         except GeneratorExit:
             logger.info("Streaming session stopped by the user")
         except Exception as e:

                 else:
                     async for chunk in self.response.aiter_bytes():
                         yield chunk
+                        self.bytes_transferred += len(chunk)
+        except httpx.TimeoutException:
+            logger.warning(f"Timeout while streaming {url}")
+            raise DownloadError(409, f"Timeout while streaming {url}")
         except GeneratorExit:
             logger.info("Streaming session stopped by the user")
         except Exception as e: