mhdzumair commited on
Commit
29348f5
·
1 Parent(s): 3057a00

Refactor extractors and routes for improved structure

Browse files

Reorganized extractor routes into dedicated modules for better maintainability. Introduced a base class for extractors, enabling consistent request handling across different services. Additionally, updated configurations and error handling in extractors, enhancing code readability and robustness.

mediaflow_proxy/configs.py CHANGED
@@ -6,6 +6,10 @@ class Settings(BaseSettings):
6
  proxy_url: str | None = None # The URL of the proxy server to route requests through.
7
  enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
8
 
 
 
 
 
9
  class Config:
10
  env_file = ".env"
11
  extra = "ignore"
 
6
  proxy_url: str | None = None # The URL of the proxy server to route requests through.
7
  enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
8
 
9
+ user_agent: str = (
10
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" # The user agent to use for HTTP requests.
11
+ )
12
+
13
  class Config:
14
  env_file = ".env"
15
  extra = "ignore"
mediaflow_proxy/extractors/__init__.py ADDED
File without changes
mediaflow_proxy/extractors/base.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Tuple, Optional
3
+
4
+ import httpx
5
+
6
+ from mediaflow_proxy.configs import settings
7
+
8
+
9
+ class BaseExtractor(ABC):
10
+ """Base class for all URL extractors."""
11
+
12
+ def __init__(self, proxy_enabled: bool = False):
13
+ self.proxy_url = settings.proxy_url if proxy_enabled else None
14
+ self.base_headers = {
15
+ "User-Agent": settings.user_agent,
16
+ "Accept-Language": "en-US,en;q=0.5",
17
+ }
18
+
19
+ async def _make_request(
20
+ self, url: str, headers: Optional[Dict] = None, follow_redirects: bool = True, **kwargs
21
+ ) -> httpx.Response:
22
+ """Make HTTP request with error handling."""
23
+ try:
24
+ async with httpx.AsyncClient(proxy=self.proxy_url) as client:
25
+ response = await client.get(
26
+ url,
27
+ headers={**self.base_headers, **(headers or {})},
28
+ follow_redirects=follow_redirects,
29
+ timeout=30,
30
+ **kwargs,
31
+ )
32
+ response.raise_for_status()
33
+ return response
34
+ except httpx.HTTPError as e:
35
+ raise ValueError(f"HTTP request failed: {str(e)}")
36
+ except Exception as e:
37
+ raise ValueError(f"Request failed: {str(e)}")
38
+
39
+ @abstractmethod
40
+ async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
41
+ """Extract final URL and required headers."""
42
+ pass
mediaflow_proxy/extractors/doodstream.py CHANGED
@@ -1,25 +1,34 @@
1
- import httpx
2
- import time
3
  import re
4
- from mediaflow_proxy.configs import settings
5
-
6
-
7
- async def doodstream_url(d: str, use_request_proxy: bool):
8
- async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
9
- headers = {
10
- "Range": "bytes=0-",
11
- "Referer": "https://d000d.com/",
12
- }
13
-
14
- response = await client.get(d, follow_redirects=True)
15
- if response.status_code == 200:
16
- # Get unique timestamp for the request
17
- real_time = str(int(time.time()))
18
- pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
19
- match = re.search(pattern, response.text, re.DOTALL)
20
- if match:
21
- url = f"https://d000d.com{match[1]}"
22
- rebobo = await client.get(url, headers=headers, follow_redirects=True)
23
- final_url = f"{rebobo.text}123456789{match[2]}{real_time}"
24
- doodstream_dict = {"Referer": "https://d000d.com/"}
25
- return final_url, doodstream_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
+ import time
3
+ from typing import Tuple, Dict
4
+
5
+ from mediaflow_proxy.extractors.base import BaseExtractor
6
+
7
+
8
+ class DoodStreamExtractor(BaseExtractor):
9
+ """DoodStream URL extractor."""
10
+
11
+ def __init__(self, proxy_enabled: bool = False):
12
+ super().__init__(proxy_enabled)
13
+ self.base_url = "https://d000d.com"
14
+
15
+ async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
16
+ """Extract DoodStream URL."""
17
+ response = await self._make_request(url)
18
+
19
+ # Extract URL pattern
20
+ pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
21
+ match = re.search(pattern, response.text, re.DOTALL)
22
+ if not match:
23
+ raise ValueError("Failed to extract URL pattern")
24
+
25
+ # Build final URL
26
+ pass_url = f"{self.base_url}{match[1]}"
27
+ referer = f"{self.base_url}/"
28
+ headers = {"Range": "bytes=0-", "Referer": referer}
29
+
30
+ rebobo_response = await self._make_request(pass_url, headers=headers)
31
+ timestamp = str(int(time.time()))
32
+ final_url = f"{rebobo_response.text}123456789{match[2]}{timestamp}"
33
+
34
+ return final_url, {"Referer": referer}
mediaflow_proxy/extractors/factory.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Type
2
+
3
+ from mediaflow_proxy.extractors.base import BaseExtractor
4
+ from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
5
+ from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
6
+ from mediaflow_proxy.extractors.uqload import UqloadExtractor
7
+
8
+
9
+ class ExtractorFactory:
10
+ """Factory for creating URL extractors."""
11
+
12
+ _extractors: Dict[str, Type[BaseExtractor]] = {
13
+ "Doodstream": DoodStreamExtractor,
14
+ "Uqload": UqloadExtractor,
15
+ "Mixdrop": MixdropExtractor,
16
+ }
17
+
18
+ @classmethod
19
+ def get_extractor(cls, host: str, proxy_enabled: bool = False) -> BaseExtractor:
20
+ """Get appropriate extractor instance for the given host."""
21
+ extractor_class = cls._extractors.get(host)
22
+ if not extractor_class:
23
+ raise ValueError(f"Unsupported host: {host}")
24
+ return extractor_class(proxy_enabled)
mediaflow_proxy/extractors/mixdrop.py CHANGED
@@ -1,27 +1,31 @@
1
- import httpx
2
  import re
3
  import string
4
- from mediaflow_proxy.configs import settings
5
 
 
6
 
7
- async def mixdrop_url(d: str, use_request_proxy: bool):
8
- headers = {
9
- "User-Agent": "Mozilla/5.0 (Windows NT 10.10; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
10
- "Accept-Language": "en-US,en;q=0.5",
11
- }
12
- async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
13
- response = await client.get(d, headers=headers, follow_redirects=True, timeout=30)
14
- [s1, s2] = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text).group(1, 2)
 
 
 
 
 
 
15
  schema = s1.split(";")[2][5:-1]
16
  terms = s2.split("|")
 
 
17
  charset = string.digits + string.ascii_letters
18
- d = dict()
19
- for i in range(len(terms)):
20
- d[charset[i]] = terms[i] or charset[i]
21
- final_url = "https:"
22
- for c in schema:
23
- final_url += d[c] if c in d else c
24
- headers_dict = {
25
- "User-Agent": "Mozilla/5.0 (Windows NT 10.10; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
26
- }
27
- return final_url, headers_dict
 
 
1
  import re
2
  import string
3
+ from typing import Dict, Tuple
4
 
5
+ from mediaflow_proxy.extractors.base import BaseExtractor
6
 
7
+
8
+ class MixdropExtractor(BaseExtractor):
9
+ """Mixdrop URL extractor."""
10
+
11
+ async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
12
+ """Extract Mixdrop URL."""
13
+ response = await self._make_request(url)
14
+
15
+ # Extract and decode URL
16
+ match = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text)
17
+ if not match:
18
+ raise ValueError("Failed to extract URL components")
19
+
20
+ s1, s2 = match.group(1, 2)
21
  schema = s1.split(";")[2][5:-1]
22
  terms = s2.split("|")
23
+
24
+ # Build character mapping
25
  charset = string.digits + string.ascii_letters
26
+ char_map = {charset[i]: terms[i] or charset[i] for i in range(len(terms))}
27
+
28
+ # Construct final URL
29
+ final_url = "https:" + "".join(char_map.get(c, c) for c in schema)
30
+
31
+ return final_url, {"User-Agent": self.base_headers["User-Agent"]}
 
 
 
 
mediaflow_proxy/extractors/uqload.py CHANGED
@@ -1,14 +1,18 @@
1
- import httpx
2
  import re
3
- from mediaflow_proxy.configs import settings
4
 
 
5
 
6
- async def uqload_url(d: str, use_request_proxy: bool):
7
- async with httpx.AsyncClient(proxy=settings.proxy_url if use_request_proxy else None) as client:
8
 
9
- response = await client.get(d, follow_redirects=True)
 
 
 
 
 
 
10
  video_url_match = re.search(r'sources: \["(.*?)"\]', response.text)
11
- if video_url_match:
12
- final_url = video_url_match.group(1)
13
- uqload_dict = {"Referer": "https://uqload.to/"}
14
- return final_url, uqload_dict
 
 
1
  import re
2
+ from typing import Dict, Tuple
3
 
4
+ from mediaflow_proxy.extractors.base import BaseExtractor
5
 
 
 
6
 
7
+ class UqloadExtractor(BaseExtractor):
8
+ """Uqload URL extractor."""
9
+
10
+ async def extract(self, url: str) -> Tuple[str, Dict[str, str]]:
11
+ """Extract Uqload URL."""
12
+ response = await self._make_request(url)
13
+
14
  video_url_match = re.search(r'sources: \["(.*?)"\]', response.text)
15
+ if not video_url_match:
16
+ raise ValueError("Failed to extract video URL")
17
+
18
+ return video_url_match.group(1), {"Referer": "https://uqload.to/"}
mediaflow_proxy/extractors_routes.py DELETED
@@ -1,55 +0,0 @@
1
- from fastapi import APIRouter, Query
2
- from fastapi.responses import JSONResponse, RedirectResponse
3
- from .extractors.doodstream import doodstream_url
4
- from .extractors.uqload import uqload_url
5
- from .extractors.mixdrop import mixdrop_url
6
- from mediaflow_proxy.configs import settings
7
-
8
- extractor_router = APIRouter()
9
- host_map = {"Doodstream": doodstream_url, "Mixdrop": mixdrop_url, "Uqload": uqload_url}
10
-
11
-
12
- @extractor_router.get("/extractor")
13
- async def doodstream_extractor(
14
- d: str = Query(..., description="Extract Clean Link from various Hosts"),
15
- use_request_proxy: bool = Query(False, description="Whether to use the MediaFlow proxy configuration."),
16
- host: str = Query(
17
- ..., description='From which Host the URL comes from, here avaiable ones: "Doodstream","Mixdrop","Uqload"'
18
- ),
19
- redirect_stream: bool = Query(
20
- False,
21
- description="If enabled the response will be redirected to stream endpoint automatically and the stream will be proxied",
22
- ),
23
- ):
24
- """
25
- Extract a clean link from DoodStream,Mixdrop,Uqload
26
-
27
- Args: request (Request): The incoming HTTP request
28
-
29
- Returns: The clean link (url) and the headers needed to access the url
30
-
31
- N.B. You can't use a rotating proxy if type is set to "Doodstream"
32
- """
33
- try:
34
- final_url, headers_dict = await host_map[host](d, use_request_proxy)
35
- except Exception as e:
36
- return JSONResponse(content={"error": str(e)})
37
- if redirect_stream == True:
38
- formatted_headers = format_headers(headers_dict)
39
- redirected_stream = f"/proxy/stream?api_password={settings.api_password}&d={final_url}&{formatted_headers}"
40
- return RedirectResponse(url=redirected_stream)
41
- elif redirect_stream == False:
42
- return JSONResponse(content={"url": final_url, "headers": headers_dict})
43
-
44
-
45
- def format_headers(headers):
46
- """
47
- Format the headers dictionary into a query string format with 'h_' prefix.
48
-
49
- Args:
50
- - headers: A dictionary of headers.
51
-
52
- Returns:
53
- - A query string formatted string of headers.
54
- """
55
- return "&".join(f"h_{key}={value}" for key, value in headers.items())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mediaflow_proxy/main.py CHANGED
@@ -1,6 +1,6 @@
1
  import logging
2
- from importlib import resources
3
  import uuid
 
4
 
5
  from fastapi import FastAPI, Depends, Security, HTTPException, BackgroundTasks
6
  from fastapi.security import APIKeyQuery, APIKeyHeader
@@ -9,12 +9,11 @@ from starlette.responses import RedirectResponse, JSONResponse
9
  from starlette.staticfiles import StaticFiles
10
 
11
  from mediaflow_proxy.configs import settings
12
- from mediaflow_proxy.routes import proxy_router
13
- from mediaflow_proxy.extractors_routes import extractor_router
14
  from mediaflow_proxy.schemas import GenerateUrlRequest
15
  from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
16
- from mediaflow_proxy.utils.rd_speedtest import run_speedtest, prune_task, results
17
  from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
 
18
 
19
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
20
  app = FastAPI()
@@ -57,7 +56,7 @@ async def trigger_speedtest(background_tasks: BackgroundTasks, api_password: str
57
  # Generate a random UUID as task_id
58
  task_id = str(uuid.uuid4()) # Generate unique task ID
59
  background_tasks.add_task(run_speedtest, task_id)
60
-
61
  # Schedule the task to be pruned after 1 hour
62
  background_tasks.add_task(prune_task, task_id)
63
 
@@ -97,8 +96,7 @@ async def generate_encrypted_or_encoded_url(request: GenerateUrlRequest):
97
 
98
 
99
  app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
100
- app.include_router(extractor_router, tags=["extractors"], dependencies=[Depends(verify_api_key)])
101
-
102
 
103
  static_path = resources.files("mediaflow_proxy").joinpath("static")
104
  app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
@@ -111,4 +109,4 @@ def run():
111
 
112
 
113
  if __name__ == "__main__":
114
- run()
 
1
  import logging
 
2
  import uuid
3
+ from importlib import resources
4
 
5
  from fastapi import FastAPI, Depends, Security, HTTPException, BackgroundTasks
6
  from fastapi.security import APIKeyQuery, APIKeyHeader
 
9
  from starlette.staticfiles import StaticFiles
10
 
11
  from mediaflow_proxy.configs import settings
12
+ from mediaflow_proxy.routes import proxy_router, extractor_router
 
13
  from mediaflow_proxy.schemas import GenerateUrlRequest
14
  from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
 
15
  from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
16
+ from mediaflow_proxy.utils.rd_speedtest import run_speedtest, prune_task, results
17
 
18
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
19
  app = FastAPI()
 
56
  # Generate a random UUID as task_id
57
  task_id = str(uuid.uuid4()) # Generate unique task ID
58
  background_tasks.add_task(run_speedtest, task_id)
59
+
60
  # Schedule the task to be pruned after 1 hour
61
  background_tasks.add_task(prune_task, task_id)
62
 
 
96
 
97
 
98
  app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
99
+ app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
 
100
 
101
  static_path = resources.files("mediaflow_proxy").joinpath("static")
102
  app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
 
109
 
110
 
111
  if __name__ == "__main__":
112
+ run()
mediaflow_proxy/routes/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .proxy import proxy_router
2
+ from .extractor import extractor_router
mediaflow_proxy/routes/extractor.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+
3
+ from fastapi import APIRouter, Query, HTTPException
4
+ from fastapi.responses import RedirectResponse
5
+
6
+ from mediaflow_proxy.configs import settings
7
+ from mediaflow_proxy.extractors.factory import ExtractorFactory
8
+ from mediaflow_proxy.schemas import ExtractorURLParams
9
+
10
+ extractor_router = APIRouter()
11
+
12
+
13
+ @extractor_router.get("/video")
14
+ async def extract_url(
15
+ extractor_params: Annotated[ExtractorURLParams, Query()],
16
+ ):
17
+ """Extract clean links from various video hosting services."""
18
+ try:
19
+ extractor = ExtractorFactory.get_extractor(extractor_params.host, extractor_params.use_request_proxy)
20
+ final_url, headers = await extractor.extract(extractor_params.destination)
21
+
22
+ if extractor_params.redirect_stream:
23
+ formatted_headers = "&".join(f"h_{k}={v}" for k, v in headers.items())
24
+ stream_url = f"/proxy/stream?api_password={settings.api_password}&d={final_url}&{formatted_headers}"
25
+ return RedirectResponse(url=stream_url)
26
+
27
+ return {"url": final_url, "headers": headers}
28
+
29
+ except ValueError as e:
30
+ raise HTTPException(status_code=400, detail=str(e))
31
+ except Exception as e:
32
+ raise HTTPException(status_code=500, detail=f"Extraction failed: {str(e)}")
mediaflow_proxy/{routes.py → routes/proxy.py} RENAMED
@@ -2,9 +2,22 @@ from typing import Annotated
2
 
3
  from fastapi import Request, Depends, APIRouter, Query, HTTPException
4
 
5
- from .handlers import handle_hls_stream_proxy, proxy_stream, get_manifest, get_playlist, get_segment, get_public_ip
6
- from .schemas import MPDSegmentParams, MPDPlaylistParams, HLSManifestParams, ProxyStreamParams, MPDManifestParams
7
- from .utils.http_utils import get_proxy_headers, ProxyRequestHeaders
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  proxy_router = APIRouter()
10
 
 
2
 
3
  from fastapi import Request, Depends, APIRouter, Query, HTTPException
4
 
5
+ from mediaflow_proxy.handlers import (
6
+ handle_hls_stream_proxy,
7
+ proxy_stream,
8
+ get_manifest,
9
+ get_playlist,
10
+ get_segment,
11
+ get_public_ip,
12
+ )
13
+ from mediaflow_proxy.schemas import (
14
+ MPDSegmentParams,
15
+ MPDPlaylistParams,
16
+ HLSManifestParams,
17
+ ProxyStreamParams,
18
+ MPDManifestParams,
19
+ )
20
+ from mediaflow_proxy.utils.http_utils import get_proxy_headers, ProxyRequestHeaders
21
 
22
  proxy_router = APIRouter()
23
 
mediaflow_proxy/schemas.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from pydantic import BaseModel, Field, IPvAnyAddress, ConfigDict
2
 
3
 
@@ -55,3 +57,9 @@ class MPDSegmentParams(GenericParams):
55
  mime_type: str = Field(..., description="The MIME type of the segment.")
56
  key_id: str | None = Field(None, description="The DRM key ID (optional).")
57
  key: str | None = Field(None, description="The DRM key (optional).")
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+
3
  from pydantic import BaseModel, Field, IPvAnyAddress, ConfigDict
4
 
5
 
 
57
  mime_type: str = Field(..., description="The MIME type of the segment.")
58
  key_id: str | None = Field(None, description="The DRM key ID (optional).")
59
  key: str | None = Field(None, description="The DRM key (optional).")
60
+
61
+
62
+ class ExtractorURLParams(GenericParams):
63
+ host: Literal["Doodstream", "Mixdrop", "Uqload"] = Field(..., description="The host to extract the URL from.")
64
+ destination: str = Field(..., description="The URL of the stream.", alias="d")
65
+ redirect_stream: bool = Field(False, description="Whether to redirect to the stream endpoint automatically.")
mediaflow_proxy/utils/http_utils.py CHANGED
@@ -125,6 +125,10 @@ class Streamer:
125
  else:
126
  async for chunk in self.response.aiter_bytes():
127
  yield chunk
 
 
 
 
128
  except GeneratorExit:
129
  logger.info("Streaming session stopped by the user")
130
  except Exception as e:
 
125
  else:
126
  async for chunk in self.response.aiter_bytes():
127
  yield chunk
128
+ self.bytes_transferred += len(chunk)
129
+ except httpx.TimeoutException:
130
+ logger.warning(f"Timeout while streaming {url}")
131
+ raise DownloadError(409, f"Timeout while streaming {url}")
132
  except GeneratorExit:
133
  logger.info("Streaming session stopped by the user")
134
  except Exception as e: