BowoZZZ commited on
Commit
42ba18c
·
verified ·
1 Parent(s): b583bff

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +27 -0
  2. main.py +288 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gunakan image Python yang ringan
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy file requirements dan install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # --- BAGIAN INI YANG HILANG DI FILE KAMU ---
12
+
13
+ # 1. Copy seluruh file project (termasuk main.py) ke dalam container
14
+ COPY . .
15
+
16
+ # 2. Buat user baru (non-root) agar sesuai security policy Hugging Face
17
+ # Ini Wajib agar tidak kena error "Permission Denied"
18
+ RUN useradd -m -u 1000 user
19
+ USER user
20
+ ENV HOME=/home/user \
21
+ PATH=/home/user/.local/bin:$PATH
22
+
23
+ # 3. Expose port 7860 (Port wajib untuk HF Spaces)
24
+ EXPOSE 7860
25
+
26
+ # 4. Perintah utama untuk menyalakan server saat Space dijalankan
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ import httpx
3
+ from bs4 import BeautifulSoup
4
+ import uvicorn
5
+ import os
6
+ from urllib.parse import unquote, urlparse, parse_qs
7
+ from contextlib import asynccontextmanager
8
+ import asyncio
9
+ import re
10
+ import json
11
+
12
+ # Setup Async Client
13
+ client = None
14
+
15
+ @asynccontextmanager
16
+ async def lifespan(app: FastAPI):
17
+ global client
18
+ headers = {
19
+ "User-Agent": "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
20
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
21
+ }
22
+ # Timeout di-disable agar tidak error saat koneksi lambat atau proses proxy lama
23
+ client = httpx.AsyncClient(headers=headers, verify=False, follow_redirects=True, timeout=None)
24
+ yield
25
+ await client.aclose()
26
+
27
+ app = FastAPI(title="GameKillerApp Scraper", lifespan=lifespan)
28
+
29
+ BASE_DOMAIN = "https://gamekillerapp.com"
30
+
31
+ def unwrap_google_url(url: str) -> str:
32
+ """Membersihkan URL dari wrapper Google Translate."""
33
+ if not url: return ""
34
+ clean = unquote(url)
35
+
36
+ # Decode jika URL terbungkus format /website?u=...
37
+ if "google" in clean and "/website" in clean and "u=" in clean:
38
+ try:
39
+ parsed = urlparse(clean)
40
+ qs = parse_qs(parsed.query)
41
+ if 'u' in qs:
42
+ return unwrap_google_url(qs['u'][0])
43
+ except:
44
+ pass
45
+
46
+ # Bersihkan domain translate
47
+ clean = clean.replace("gamekillerapp-com.translate.goog", "gamekillerapp.com")
48
+
49
+ # Hapus parameter google translate
50
+ clean = clean.split("?_x_tr_")[0]
51
+ clean = clean.split("&_x_tr_")[0]
52
+
53
+ # Handle relative URL
54
+ if clean.startswith("/"):
55
+ clean = BASE_DOMAIN + clean
56
+
57
+ return clean
58
+
59
+ async def fetch_until_success(url: str, validator_func) -> BeautifulSoup:
60
+ """
61
+ Core Logic: Terus melakukan request ke URL sampai validator_func mengembalikan True.
62
+ """
63
+ current_url = url
64
+
65
+ while True:
66
+ try:
67
+ res = await client.get(current_url)
68
+
69
+ # Jika terkena limit (429) dan sedang menggunakan proxy translate, switch ke direct
70
+ if res.status_code == 429 and "translate.goog" in current_url:
71
+ current_url = unwrap_google_url(current_url)
72
+ continue
73
+
74
+ soup = BeautifulSoup(res.text, 'html.parser')
75
+ # Cek validasi konten
76
+ if validator_func(soup):
77
+ return soup
78
+
79
+ # Tambahan: Jika status 200 tapi validasi gagal (misal struktur berubah),
80
+ # kita break agar tidak infinite loop, kecuali jika memang mau retry forever.
81
+ # Disini kita biarkan loop tapi sangat bergantung pada validator yang akurat.
82
+
83
+ except Exception:
84
+ pass
85
+ # Retry logic implicit
86
+
87
+ async def extract_links_from_nuxt_data(soup: BeautifulSoup) -> list:
88
+ """
89
+ Ekstrak link download dari script JSON Nuxt (__NUXT_DATA__).
90
+ LOGIKA BARU: Filter path '/download/' untuk membedakan file game dan video iklan.
91
+ """
92
+ links = []
93
+ try:
94
+ # Mengambil script data Nuxt
95
+ script = soup.select_one('script#__NUXT_DATA__')
96
+ if script:
97
+ try:
98
+ data = json.loads(script.string)
99
+ except:
100
+ data = []
101
+
102
+ # Data Nuxt 3 berbentuk array flat. Kita iterasi semua item.
103
+ if isinstance(data, list):
104
+ for item in data:
105
+ if isinstance(item, str):
106
+ # --- FILTER KETAT ---
107
+ # 1. Harus link HTTP/HTTPS
108
+ # 2. Harus domain 'gamercdn.top'
109
+ # 3. WAJIB mengandung path '/download/' (ini kuncinya agar video .mp4 tidak masuk)
110
+ if "http" in item and "gamercdn.top" in item and "/download/" in item:
111
+ links.append(item)
112
+ except Exception as e:
113
+ print(f"Error extracting Nuxt data: {e}")
114
+
115
+ # Hapus duplikat dan return
116
+ return list(set(links))
117
+
118
+ async def get_final_download_links(download_page_url: str) -> list:
119
+ """
120
+ Masuk ke halaman download intermediate (/download).
121
+ Mengambil link dari JSON data Nuxt.
122
+ """
123
+ # Convert ke Proxy URL
124
+ target_url = download_page_url.replace("https://gamekillerapp.com", "https://gamekillerapp-com.translate.goog")
125
+ if "?" not in target_url:
126
+ target_url += "?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en"
127
+ else:
128
+ target_url += "&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en"
129
+
130
+ def is_valid_download_page(soup):
131
+ # Validasi: Halaman harus punya script Nuxt Data ATAU kotak download
132
+ # Berdasarkan HTML yang kamu kirim, ID nya adalah __NUXT_DATA__
133
+ has_nuxt = bool(soup.select_one('script#__NUXT_DATA__'))
134
+ # Kita juga cek title atau elemen unik lain untuk memastikan page loaded sempurna
135
+ # (HTML kamu punya class .download-btn-box)
136
+ has_box = bool(soup.select_one('.download-btn-box'))
137
+ return has_nuxt or has_box
138
+
139
+ soup = await fetch_until_success(target_url, is_valid_download_page)
140
+
141
+ # Ekstrak dari Data Nuxt menggunakan logika path /download/
142
+ final_links = await extract_links_from_nuxt_data(soup)
143
+
144
+ return final_links
145
+
146
+ async def process_item_fully(name, detail_url, image, initial_size):
147
+ """
148
+ Memproses satu item app:
149
+ 1. Masuk detail (via Proxy).
150
+ 2. Cari tombol 'apk-download-btn' ke halaman download intermediate.
151
+ 3. Masuk halaman intermediate -> Parse Nuxt Data -> Final Link.
152
+ """
153
+ while True:
154
+ try:
155
+ # Convert ke Proxy URL untuk halaman detail
156
+ target_detail_url = detail_url.replace("https://gamekillerapp.com", "https://gamekillerapp-com.translate.goog")
157
+ if "?" not in target_detail_url:
158
+ target_detail_url += "?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en"
159
+ else:
160
+ target_detail_url += "&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en"
161
+
162
+ # 1. Fetch Halaman Detail
163
+ def detail_page_valid(s):
164
+ # Validasi: Halaman detail harus punya tombol ke halaman download (/download)
165
+ # Class berdasarkan HTML kamu: apk-download-btn
166
+ return bool(s.select('a.apk-download-btn'))
167
+
168
+ app_soup = await fetch_until_success(target_detail_url, detail_page_valid)
169
+
170
+ # 2. Cari link menuju halaman download intermediate
171
+ # Selector: a.apk-download-btn
172
+ download_page_btn = app_soup.select_one('a.apk-download-btn')
173
+
174
+ if not download_page_btn:
175
+ return None
176
+
177
+ intermediate_url = unwrap_google_url(download_page_btn.get('href'))
178
+
179
+ # 3. Masuk ke halaman intermediate dan ambil link final dari JSON
180
+ final_data_list = await get_final_download_links(intermediate_url)
181
+
182
+ # Jika list kosong, return apa adanya (mungkin paid atau error parsing)
183
+ # Jangan continue loop di sini agar tidak infinite loop jika data memang tidak ada
184
+
185
+ return {
186
+ "name": name,
187
+ "link": unwrap_google_url(detail_url),
188
+ "image": image,
189
+ "download": ", ".join(final_data_list) if final_data_list else "Not Found",
190
+ "size": initial_size
191
+ }
192
+
193
+ except Exception:
194
+ # Jika error (koneksi putus total), baru retry
195
+ # Kalau error parsing, lebih baik break dan return None agar tidak hang
196
+ break
197
+
198
+ return None
199
+
200
+ @app.get("/")
201
+ async def root():
202
+ return {
203
+ "message": "Search API for gamekillerapp.com (Nuxt /download/ filter)",
204
+ "example_usage": "/search?query=ultraman&limit=5"
205
+ }
206
+
207
+ @app.get("/search")
208
+ async def search_apps(
209
+ query: str = Query(..., description="App name"),
210
+ limit: int = Query(5, description="Limit results")
211
+ ):
212
+ tasks = []
213
+
214
+ # Construct URL Proxy
215
+ search_url = f"https://gamekillerapp-com.translate.goog/search/{query}?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en"
216
+
217
+ # Validator Search Page
218
+ def search_page_valid(s):
219
+ # Cek item atau text "no results"
220
+ has_items = bool(s.select('.column-games-item'))
221
+ text_content = s.get_text()
222
+ no_result = "no results" in text_content.lower() or "nothing found" in text_content.lower()
223
+ # Juga cek jika title mengandung "Search results" untuk memastikan halaman benar load
224
+ is_search_page = bool(s.select('.column-title'))
225
+ return has_items or no_result or is_search_page
226
+
227
+ soup = await fetch_until_success(search_url, search_page_valid)
228
+
229
+ # 1. Ambil items
230
+ items = soup.select('.column-games-item')
231
+
232
+ if not items:
233
+ return {
234
+ "success": True,
235
+ "query": query,
236
+ "limit": limit,
237
+ "count": 0,
238
+ "results": []
239
+ }
240
+
241
+ for item in items:
242
+ # Nama
243
+ title_el = item.select_one('.column-games-item-info-name')
244
+ if not title_el: continue
245
+ name = title_el.get_text(strip=True)
246
+
247
+ # Link Detail
248
+ detail_link = unwrap_google_url(item.get('href'))
249
+
250
+ # Gambar
251
+ img_el = item.select_one('.column-games-item-icon')
252
+ image = ""
253
+ if img_el:
254
+ # Cek src atau data-src (lazy load)
255
+ image = unwrap_google_url(img_el.get('src') or img_el.get('data-src') or "")
256
+
257
+ # Size & Version
258
+ meta_el = item.select_one('.column-games-item-info-version')
259
+ size_text = "Unknown"
260
+ if meta_el:
261
+ full_text = meta_el.get_text(strip=True)
262
+ if "+" in full_text:
263
+ parts = full_text.split("+")
264
+ if len(parts) > 1:
265
+ size_text = parts[1].strip()
266
+ else:
267
+ size_text = full_text
268
+
269
+ tasks.append(process_item_fully(name, detail_link, image, size_text))
270
+
271
+ if len(tasks) >= limit:
272
+ break
273
+
274
+ # Jalankan task
275
+ raw_results = await asyncio.gather(*tasks)
276
+ results = [res for res in raw_results if res is not None]
277
+
278
+ return {
279
+ "success": True,
280
+ "query": query,
281
+ "limit": limit,
282
+ "count": len(results),
283
+ "results": results
284
+ }
285
+
286
+ if __name__ == "__main__":
287
+ port = int(os.environ.get("PORT", 7860))
288
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ httpx
4
+ beautifulsoup4