| from fastapi import FastAPI, HTTPException, Query |
| import httpx |
| from bs4 import BeautifulSoup |
| import uvicorn |
| import os |
| from urllib.parse import unquote, urlparse, parse_qs |
| from contextlib import asynccontextmanager |
| import asyncio |
| import re |
|
|
| |
| client = None |
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| global client |
| headers = { |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", |
| } |
| |
| client = httpx.AsyncClient(headers=headers, verify=False, follow_redirects=True, timeout=None) |
| yield |
| await client.aclose() |
|
|
| app = FastAPI(title="Tendoku Scraper", lifespan=lifespan) |
|
|
| BASE_DOMAIN = "https://www.tendoku.com" |
|
|
| def unwrap_google_url(url: str) -> str: |
| """Membersihkan URL dari wrapper Google Translate.""" |
| if not url: return "" |
| clean = unquote(url) |
| |
| |
| if "google" in clean and "/website" in clean and "u=" in clean: |
| try: |
| parsed = urlparse(clean) |
| qs = parse_qs(parsed.query) |
| if 'u' in qs: |
| return unwrap_google_url(qs['u'][0]) |
| except: |
| pass |
|
|
| |
| clean = clean.replace("www-tendoku-com.translate.goog", "www.tendoku.com") |
| |
| |
| clean = clean.split("?_x_tr_")[0] |
| clean = clean.split("&_x_tr_")[0] |
| |
| |
| if clean.startswith("/"): |
| clean = BASE_DOMAIN + clean |
| return clean |
|
|
| def format_bytes(size): |
| """Helper untuk mengubah bytes ke format readable (MB, GB).""" |
| try: |
| power = 2**10 |
| n = 0 |
| power_labels = {0 : '', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB'} |
| while size > power: |
| size /= power |
| n += 1 |
| return f"{size:.2f} {power_labels[n]}" |
| except: |
| return "Unknown" |
|
|
| async def get_remote_file_size(url: str) -> str: |
| """ |
| (FITUR DISIMPAN TAPI TIDAK DIPAKAI UNTUK TENDOKU SESUAI REQUEST) |
| Mengambil ukuran file dari Header tanpa mendownload body. |
| """ |
| try: |
| headers = {"Range": "bytes=0-0"} |
| req = client.build_request("GET", url, headers=headers) |
| r = await client.send(req, stream=True) |
| total_size = 0 |
| content_range = r.headers.get("Content-Range") |
| if content_range and "/" in content_range: |
| try: |
| total_size = int(content_range.split("/")[-1]) |
| except: |
| pass |
| if total_size == 0: |
| content_length = r.headers.get("Content-Length") |
| if content_length and content_length.isdigit(): |
| total_size = int(content_length) |
| await r.aclose() |
| if total_size > 0: |
| return format_bytes(total_size) |
| return "Unknown" |
| except Exception: |
| return "Unknown" |
|
|
| async def fetch_until_success(url: str, validator_func) -> BeautifulSoup: |
| """ |
| Core Logic: Terus melakukan request ke URL sampai validator_func mengembalikan True. |
| MODIFIKASI: Jika terkena 429 (Too Many Requests) pada Proxy, switch ke Direct URL. |
| """ |
| current_url = url |
| |
| while True: |
| try: |
| res = await client.get(current_url) |
| |
| |
| if res.status_code == 429 and "translate.goog" in current_url: |
| current_url = unwrap_google_url(current_url) |
| continue |
|
|
| soup = BeautifulSoup(res.text, 'html.parser') |
| if validator_func(soup): |
| return soup |
| |
| except Exception: |
| pass |
| |
| return BeautifulSoup("", "html.parser") |
|
|
| async def process_item_fully(name, detail_url, image): |
| """ |
| Memproses satu item app: |
| 1. Masuk detail (via Proxy). |
| 2. Parsing Size dari HTML Text (<li>Size : ...</li>). |
| 3. Parsing Link Download dari class 'download_disini'. |
| """ |
| |
| while True: |
| try: |
| |
| |
| target_detail_url = detail_url.replace("https://www.tendoku.com", "https://www-tendoku-com.translate.goog") |
| |
| |
| if "?" not in target_detail_url: |
| target_detail_url += "?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en" |
| else: |
| target_detail_url += "&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en" |
|
|
| |
| def detail_page_valid(s): |
| |
| return bool(s.select('article.post')) or bool(s.select('.entry-content')) |
| |
| app_soup = await fetch_until_success(target_detail_url, detail_page_valid) |
| if not app_soup.text: return None |
|
|
| |
| |
| final_size_list = [] |
| content_area = app_soup.select_one('.entry-content') |
| |
| if content_area: |
| |
| for li in content_area.select('li'): |
| text = li.get_text(strip=True) |
| if "Size" in text and ":" in text: |
| |
| try: |
| size_part = text.split(":", 1)[1].strip() |
| final_size_list.append(size_part) |
| except: |
| pass |
| |
| |
| if not final_size_list: |
| for p in content_area.select('p'): |
| text = p.get_text(strip=True) |
| if "Size" in text and ":" in text: |
| try: |
| size_part = text.split(":", 1)[1].strip() |
| final_size_list.append(size_part) |
| except: |
| pass |
|
|
| |
| size_result = ", ".join(final_size_list) if final_size_list else "Unknown" |
|
|
| |
| download_elements = app_soup.select('a.download_disini') |
| |
| final_link_list = [] |
| |
| for btn in download_elements: |
| raw_link = btn.get('href') |
| if not raw_link: continue |
| |
| |
| clean_link = unwrap_google_url(raw_link) |
| final_link_list.append(clean_link) |
| |
| |
| if not final_link_list: |
| return None |
|
|
| return { |
| "name": name, |
| "link": unwrap_google_url(detail_url), |
| "image": image, |
| "download": ", ".join(final_link_list), |
| "size": size_result |
| } |
| |
| except Exception: |
| |
| |
| continue |
| |
| return None |
|
|
| @app.get("/") |
| async def root(): |
| |
| return { |
| "message": "Search API for Tendoku.com by Bowo", |
| "github": "https://github.com/SaptaZ", |
| "example_usage": "/search?query=minecraft&limit=5" |
| } |
|
|
| @app.get("/search") |
| async def search_apps( |
| query: str = Query(..., description="App name"), |
| limit: int = Query(5, description="Limit results") |
| ): |
| collected_results = [] |
| |
| |
| current_page = 1 |
| |
| while True: |
| |
| |
| |
| |
| if current_page == 1: |
| search_url = f"https://www-tendoku-com.translate.goog/?s={query}&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en" |
| else: |
| search_url = f"https://www-tendoku-com.translate.goog/page/{current_page}/?s={query}&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en" |
| |
| |
| def search_page_valid(s): |
| |
| has_items = bool(s.select('article.post')) |
| text_content = s.get_text() |
| if not text_content: return False |
| |
| no_result = "Nothing Found" in text_content or "nothing matched" in text_content |
| return has_items or no_result |
|
|
| soup = await fetch_until_success(search_url, search_page_valid) |
| |
| |
| if not soup.text: |
| break |
|
|
| |
| text_content = soup.get_text() |
| if ("Nothing Found" in text_content or "nothing matched" in text_content) and current_page == 1: |
| return { |
| "success": True, |
| "query": query, |
| "limit": limit, |
| "count": 0, |
| "results": [] |
| } |
|
|
| |
| |
| items = soup.select('article.post') |
| if not items: |
| break |
|
|
| page_tasks = [] |
|
|
| for item in items: |
| |
| title_el = item.select_one('.entry-title a') |
| if not title_el: continue |
| |
| name = title_el.get_text(strip=True) |
| |
| detail_link = unwrap_google_url(title_el['href']) |
| |
| |
| img_el = item.select_one('.content-thumbnail img') |
| image = unwrap_google_url(img_el['src']) if img_el else "" |
| |
| |
| page_tasks.append(process_item_fully(name, detail_link, image)) |
| |
| if not page_tasks: |
| break |
|
|
| |
| page_results = await asyncio.gather(*page_tasks) |
| |
| |
| valid_page_results = [res for res in page_results if res is not None] |
| |
| |
| collected_results.extend(valid_page_results) |
| |
| |
| if len(collected_results) >= limit: |
| break |
| |
| |
| |
| next_link_el = soup.select_one('a.next.page-numbers') |
| |
| |
| if not next_link_el: |
| |
| next_link_el = soup.find('a', string=re.compile(r'Next|Older', re.I)) |
|
|
| if next_link_el: |
| current_page += 1 |
| else: |
| break |
|
|
| |
| results = collected_results[:limit] |
|
|
| return { |
| "success": True, |
| "query": query, |
| "limit": limit, |
| "count": len(results), |
| "results": results |
| } |
|
|
| if __name__ == "__main__": |
| port = int(os.environ.get("PORT", 7860)) |
| uvicorn.run(app, host="0.0.0.0", port=port) |