Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -28,7 +28,6 @@ ua = UserAgent(fallback='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/5
|
|
| 28 |
MEDIAFIRE_CACHE = {}
|
| 29 |
CACHE_TTL = 1800
|
| 30 |
|
| 31 |
-
# Persistent Client for connection pooling
|
| 32 |
client = httpx.AsyncClient(
|
| 33 |
timeout=httpx.Timeout(60.0, read=None),
|
| 34 |
follow_redirects=True,
|
|
@@ -85,29 +84,48 @@ async def download_proxy(request: Request, url: str, key: str = None):
|
|
| 85 |
|
| 86 |
if not target_link:
|
| 87 |
try:
|
| 88 |
-
# Browser အစစ်လိုမျိုး Header တွေထည့်ပြီး Scrape လုပ်ခြင်း
|
| 89 |
headers = {
|
| 90 |
'User-Agent': ua.random,
|
| 91 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
| 92 |
'Accept-Language': 'en-US,en;q=0.5',
|
| 93 |
-
'
|
| 94 |
-
'Upgrade-Insecure-Requests': '1'
|
| 95 |
}
|
| 96 |
-
async with httpx.AsyncClient(headers=headers, follow_redirects=True) as temp_client:
|
| 97 |
r = await temp_client.get(clean_url)
|
| 98 |
if r.status_code == 200:
|
| 99 |
-
|
|
|
|
| 100 |
if match:
|
| 101 |
-
target_link = match.group(0).
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
if target_link:
|
|
|
|
|
|
|
| 104 |
MEDIAFIRE_CACHE[clean_url] = {'link': target_link, 'time': current_time}
|
| 105 |
-
except
|
|
|
|
| 106 |
|
| 107 |
if target_link:
|
| 108 |
return await stream_file(target_link, range_header, filename, referer=clean_url)
|
| 109 |
else:
|
| 110 |
-
|
|
|
|
| 111 |
|
| 112 |
# --- Google Drive Section ---
|
| 113 |
elif "drive.google.com" in clean_url:
|
|
@@ -140,20 +158,18 @@ async def download_proxy(request: Request, url: str, key: str = None):
|
|
| 140 |
async def stream_file(target_url, range_header, filename, referer=None):
|
| 141 |
headers = {'User-Agent': ua.random}
|
| 142 |
if range_header: headers['Range'] = range_header
|
| 143 |
-
if referer: headers['Referer'] = referer
|
| 144 |
|
| 145 |
try:
|
| 146 |
req = client.build_request("GET", target_url, headers=headers)
|
| 147 |
r = await client.send(req, stream=True)
|
| 148 |
|
| 149 |
-
#
|
| 150 |
if "text/html" in r.headers.get("Content-Type", "").lower() and r.status_code == 200:
|
| 151 |
await r.aclose()
|
| 152 |
-
# Cache ရှင်းလင်းခြင်း
|
| 153 |
for key, val in list(MEDIAFIRE_CACHE.items()):
|
| 154 |
-
if val['link'] == target_url:
|
| 155 |
-
|
| 156 |
-
raise HTTPException(status_code=415, detail="MediaFire detection triggered. Retrying...")
|
| 157 |
|
| 158 |
return await process_response(r, filename)
|
| 159 |
except HTTPException: raise
|
|
|
|
| 28 |
MEDIAFIRE_CACHE = {}
|
| 29 |
CACHE_TTL = 1800
|
| 30 |
|
|
|
|
| 31 |
client = httpx.AsyncClient(
|
| 32 |
timeout=httpx.Timeout(60.0, read=None),
|
| 33 |
follow_redirects=True,
|
|
|
|
| 84 |
|
| 85 |
if not target_link:
|
| 86 |
try:
|
|
|
|
| 87 |
headers = {
|
| 88 |
'User-Agent': ua.random,
|
| 89 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
| 90 |
'Accept-Language': 'en-US,en;q=0.5',
|
| 91 |
+
'Referer': 'https://www.mediafire.com/'
|
|
|
|
| 92 |
}
|
| 93 |
+
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20.0) as temp_client:
|
| 94 |
r = await temp_client.get(clean_url)
|
| 95 |
if r.status_code == 200:
|
| 96 |
+
# 1. Regex ရှာဖွေခြင်း (ပိုမိုကျယ်ပြန့်သော Pattern)
|
| 97 |
+
match = re.search(r'https?://download[^\s"\']+mediafire\.com/[^\s"\']+', r.text)
|
| 98 |
if match:
|
| 99 |
+
target_link = match.group(0).strip()
|
| 100 |
|
| 101 |
+
# 2. BeautifulSoup နဲ့ ထပ်မံရှာဖွေခြင်း
|
| 102 |
+
if not target_link:
|
| 103 |
+
soup = BeautifulSoup(r.text, 'html.parser')
|
| 104 |
+
# Download link ရှာရန် ဖြစ်နိုင်သမျှ နည်းလမ်းအားလုံးသုံးခြင်း
|
| 105 |
+
link_tags = soup.find_all('a', href=True)
|
| 106 |
+
for tag in link_tags:
|
| 107 |
+
href = tag['href']
|
| 108 |
+
if 'download' in href and 'mediafire.com' in href:
|
| 109 |
+
target_link = href
|
| 110 |
+
break
|
| 111 |
+
|
| 112 |
+
# aria-label ထဲတွင် ဝှက်ထားသော link ကိုရှာခြင်း
|
| 113 |
+
if not target_link:
|
| 114 |
+
download_btn = soup.find('a', {'aria-label': re.compile(r'Download', re.I)})
|
| 115 |
+
if download_btn: target_link = download_btn.get('href')
|
| 116 |
+
|
| 117 |
if target_link:
|
| 118 |
+
# // နဲ့စရင် https: ထည့်ပေးခြင်း
|
| 119 |
+
if target_link.startswith("//"): target_link = f"https:{target_link}"
|
| 120 |
MEDIAFIRE_CACHE[clean_url] = {'link': target_link, 'time': current_time}
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Scraper Error: {e}")
|
| 123 |
|
| 124 |
if target_link:
|
| 125 |
return await stream_file(target_link, range_header, filename, referer=clean_url)
|
| 126 |
else:
|
| 127 |
+
# 404 ပြန်ရခြင်းအကြောင်းရင်းမှာ Scraper က link ရှာမတွေ့လို့ပါ
|
| 128 |
+
raise HTTPException(status_code=404, detail="Direct link not found in MediaFire page.")
|
| 129 |
|
| 130 |
# --- Google Drive Section ---
|
| 131 |
elif "drive.google.com" in clean_url:
|
|
|
|
| 158 |
async def stream_file(target_url, range_header, filename, referer=None):
|
| 159 |
headers = {'User-Agent': ua.random}
|
| 160 |
if range_header: headers['Range'] = range_header
|
| 161 |
+
if referer: headers['Referer'] = referer
|
| 162 |
|
| 163 |
try:
|
| 164 |
req = client.build_request("GET", target_url, headers=headers)
|
| 165 |
r = await client.send(req, stream=True)
|
| 166 |
|
| 167 |
+
# HTML ပြန်ကျလာရင် (Block ခံရရင်) Cache ရှင်းပြီး Error ပြန်ပေးရန်
|
| 168 |
if "text/html" in r.headers.get("Content-Type", "").lower() and r.status_code == 200:
|
| 169 |
await r.aclose()
|
|
|
|
| 170 |
for key, val in list(MEDIAFIRE_CACHE.items()):
|
| 171 |
+
if val['link'] == target_url: del MEDIAFIRE_CACHE[key]
|
| 172 |
+
raise HTTPException(status_code=415, detail="MediaFire detection triggered.")
|
|
|
|
| 173 |
|
| 174 |
return await process_response(r, filename)
|
| 175 |
except HTTPException: raise
|