BowoZZZ commited on
Commit
fae4a06
·
verified ·
1 Parent(s): aa2b989

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +27 -0
  2. main.py +235 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gunakan image Python yang ringan
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy file requirements dan install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # --- BAGIAN INI YANG HILANG DI FILE KAMU ---
12
+
13
+ # 1. Copy seluruh file project (termasuk main.py) ke dalam container
14
+ COPY . .
15
+
16
+ # 2. Buat user baru (non-root) agar sesuai security policy Hugging Face
17
+ # Ini Wajib agar tidak kena error "Permission Denied"
18
+ RUN useradd -m -u 1000 user
19
+ USER user
20
+ ENV HOME=/home/user \
21
+ PATH=/home/user/.local/bin:$PATH
22
+
23
+ # 3. Expose port 7860 (Port wajib untuk HF Spaces)
24
+ EXPOSE 7860
25
+
26
+ # 4. Perintah utama untuk menyalakan server saat Space dijalankan
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ import httpx
3
+ from bs4 import BeautifulSoup
4
+ import uvicorn
5
+ import os
6
+ from urllib.parse import unquote, urlparse, parse_qs
7
+ from contextlib import asynccontextmanager
8
+ import asyncio
9
+ import re
10
+
11
+ # Setup Async Client
12
+ client = None
13
+
14
+ @asynccontextmanager
15
+ async def lifespan(app: FastAPI):
16
+ global client
17
+ headers = {
18
+ "User-Agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
19
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
20
+ }
21
+ # Timeout di-disable (set ke None) agar tidak error saat koneksi lambat
22
+ client = httpx.AsyncClient(headers=headers, verify=False, follow_redirects=True, timeout=None)
23
+ yield
24
+ await client.aclose()
25
+
26
+ app = FastAPI(title="5play.org Aggressive Scraper (Pagination Mode)", lifespan=lifespan)
27
+
28
+ BASE_DOMAIN = "https://5play.org"
29
+
30
+ def clean_url(url: str) -> str:
31
+ """Membersihkan URL relatif menjadi absolut."""
32
+ if not url: return ""
33
+ clean = unquote(url)
34
+ if clean.startswith("/"):
35
+ clean = BASE_DOMAIN + clean
36
+ return clean
37
+
38
+ async def fetch_until_success(url: str, validator_func) -> BeautifulSoup:
39
+ """
40
+ Core Logic: Terus melakukan request ke URL sampai validator_func mengembalikan True.
41
+ """
42
+ while True:
43
+ try:
44
+ res = await client.get(url)
45
+ soup = BeautifulSoup(res.text, 'html.parser')
46
+ if validator_func(soup):
47
+ return soup
48
+ except Exception:
49
+ pass
50
+
51
+ async def scan_cdn_page_loop(cdn_url: str) -> str:
52
+ """
53
+ Looping scraping halaman intermediate (CDN PHP) sampai dapat link asli.
54
+ """
55
+ def is_valid_cdn_page(soup):
56
+ btn = soup.select_one('a#btn-download-cdn')
57
+ if btn and btn.get('href'): return True
58
+ return False
59
+
60
+ soup = await fetch_until_success(cdn_url, is_valid_cdn_page)
61
+ download_btn = soup.select_one('a#btn-download-cdn')
62
+ if download_btn:
63
+ return download_btn['href']
64
+ return ""
65
+
66
+ async def process_item_fully(name, detail_url, image):
67
+ """
68
+ Memproses satu item app:
69
+ 1. Masuk detail -> Cek Game/Berita.
70
+ 2. Ambil Size & Link CDN.
71
+ 3. Masuk CDN -> Final Link.
72
+ """
73
+ while True:
74
+ try:
75
+ # 1. Fetch Halaman Detail
76
+ def detail_page_valid(s):
77
+ is_app = bool(s.select('a.download-line-link'))
78
+ is_news = bool(s.select('.blogview'))
79
+ return is_app or is_news
80
+
81
+ app_soup = await fetch_until_success(detail_url, detail_page_valid)
82
+
83
+ # 2. Filter: Jika Berita (News), skip.
84
+ if app_soup.select('.blogview'):
85
+ return None
86
+
87
+ # 3. Ambil Size dan Link CDN (Khusus Game)
88
+ download_elements = app_soup.select('a.download-line-link')
89
+ if not download_elements:
90
+ continue
91
+
92
+ final_data_list = []
93
+ size = "Unknown"
94
+
95
+ for btn in download_elements:
96
+ cdn_link = btn.get('href')
97
+ btn_text = btn.get_text(strip=True)
98
+
99
+ if not cdn_link: continue
100
+ cdn_link = clean_url(cdn_link)
101
+
102
+ if size == "Unknown":
103
+ size_match = re.search(r'\((.*?)\)', btn_text)
104
+ if size_match:
105
+ size = size_match.group(1)
106
+
107
+ # 4. Masuk ke halaman CDN
108
+ direct_link = await scan_cdn_page_loop(cdn_link)
109
+ if direct_link:
110
+ final_data_list.append(direct_link)
111
+
112
+ if not final_data_list:
113
+ continue
114
+
115
+ return {
116
+ "name": name,
117
+ "link": detail_url,
118
+ "image": image,
119
+ "download": ", ".join(final_data_list),
120
+ "size": size
121
+ }
122
+
123
+ except Exception:
124
+ continue
125
+
126
+ @app.get("/")
127
+ async def root():
128
+ return {
129
+ "message": "Search API for 5play.org by Bowo",
130
+ "example_usage": "/search?query=minecraft&limit=5"
131
+ }
132
+
133
+ @app.get("/search")
134
+ async def search_apps(
135
+ query: str = Query(..., description="App name"),
136
+ limit: int = Query(5, description="Limit results") # Parameter limit dikembalikan
137
+ ):
138
+ tasks = []
139
+
140
+ # Start dari halaman 0 (atau 1, tergantung logic DLE, biasanya 0 adalah awal)
141
+ # Kita mulai tanpa parameter search_start dulu untuk halaman pertama
142
+ current_search_start = 0
143
+
144
+ while True:
145
+ # Construct URL berdasarkan halaman saat ini
146
+ # Logic: index.php?story=...&search_start={current_search_start}
147
+ search_url = f"https://5play.org/index.php?story={query}&lang=en&do=search&subaction=search&titleonly=0&search_start={current_search_start}"
148
+
149
+ # Validator Search Page
150
+ def search_page_valid(s):
151
+ has_items = bool(s.select('.search-item'))
152
+ text_content = s.get_text()
153
+ no_result = "Found 0 responses" in text_content or "no results" in text_content.lower()
154
+ return has_items or no_result
155
+
156
+ soup = await fetch_until_success(search_url, search_page_valid)
157
+
158
+ # Cek jika tidak ada hasil sama sekali di halaman pertama
159
+ text_content = soup.get_text()
160
+ if ("Found 0 responses" in text_content or "no results" in text_content.lower()) and current_search_start == 0:
161
+ return {
162
+ "success": True,
163
+ "query": query,
164
+ "limit": limit,
165
+ "count": 0,
166
+ "results": []
167
+ }
168
+
169
+ # 1. Ambil items dari halaman ini
170
+ items = soup.select('.search-item')
171
+ if not items:
172
+ # Jika halaman valid tapi tidak ada item (berarti end of list), break loop
173
+ break
174
+
175
+ for item in items:
176
+ title_el = item.select_one('a.item-link.title')
177
+ if not title_el: continue
178
+
179
+ name = title_el.get_text(strip=True)
180
+ detail_link = clean_url(title_el['href'])
181
+
182
+ img_el = item.select_one('.search-item-img img')
183
+ image = clean_url(img_el['src']) if img_el else ""
184
+
185
+ tasks.append(process_item_fully(name, detail_link, image))
186
+
187
+ # OPTIMISASI: Jika jumlah task yang dikumpulkan sudah >= limit, stop scraping halaman selanjutnya
188
+ if len(tasks) >= limit:
189
+ break
190
+
191
+ # 2. Cek Navigasi Smart (Tombol Next)
192
+ # Cari tombol dengan ID nextlink
193
+ next_link_el = soup.select_one('a#nextlink')
194
+
195
+ if next_link_el:
196
+ # Ambil nilai onclick, contoh: javascript:list_submit(3); return(false)
197
+ onclick_val = next_link_el.get('onclick', '')
198
+ match = re.search(r'list_submit\((\d+)\)', onclick_val)
199
+
200
+ if match:
201
+ # Update current_search_start dengan angka halaman berikutnya
202
+ next_page_num = int(match.group(1))
203
+
204
+ # Safety: Hindari infinite loop jika angka tidak berubah
205
+ if next_page_num <= current_search_start and current_search_start != 0:
206
+ break
207
+
208
+ current_search_start = next_page_num
209
+ else:
210
+ # Jika ada tombol tapi regex gagal parse angkanya, stop (safety)
211
+ break
212
+ else:
213
+ # Tidak ada tombol next, berarti ini halaman terakhir
214
+ break
215
+
216
+ # Jalankan semua task dari semua halaman secara paralel
217
+ raw_results = await asyncio.gather(*tasks)
218
+
219
+ # Filter None (Berita)
220
+ results = [res for res in raw_results if res is not None]
221
+
222
+ # FIX BUG: Pastikan hasil akhir dipotong sesuai limit
223
+ results = results[:limit]
224
+
225
+ return {
226
+ "success": True,
227
+ "query": query,
228
+ "limit": limit,
229
+ "count": len(results),
230
+ "results": results
231
+ }
232
+
233
+ if __name__ == "__main__":
234
+ port = int(os.environ.get("PORT", 7860))
235
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ httpx
4
+ beautifulsoup4