// server.js — Anichin Scraper REST API + Scheduler require("dotenv").config(); const express = require("express"); const cors = require("cors"); const cron = require("node-cron"); const { getDB } = require("./firebase"); const { scrapeAnimeList, scrapeAnimeDetails, scrapeIncrementalUpdates, scrapeSearch, parseAnimeDetail, parseEpisodePage, fetchPage, extractSlug, } = require("./scraper"); const app = express(); // HF Spaces wajib PORT 7860 const PORT = process.env.PORT || 7860; const BASE_URL = process.env.BASE_URL || "https://anichin.cafe"; app.use(cors()); app.use(express.json()); app.use(express.static("public")); // serve frontend // ─── SCRAPE STATE ───────────────────────────────────────────────────────────── const scrapeState = { isRunning: false, lastRun: null, progress: { current: 0, total: 0, stage: "idle" }, log: [], }; function addLog(msg) { const entry = { time: new Date().toISOString(), msg }; scrapeState.log.unshift(entry); scrapeState.log = scrapeState.log.slice(0, 100); // keep last 100 console.log(msg); } // ─── API ROUTES ─────────────────────────────────────────────────────────────── // Status app.get("/api/status", (req, res) => { res.json({ status: "ok", scrapeRunning: scrapeState.isRunning, lastRun: scrapeState.lastRun, progress: scrapeState.progress, }); }); // Scrape logs app.get("/api/logs", (req, res) => { res.json(scrapeState.log); }); // ── Trigger full scrape ─────────────────────────────────────────────────────── app.post("/api/scrape/full", async (req, res) => { if (scrapeState.isRunning) { return res.status(409).json({ error: "Scrape lagi jalan, tunggu dulu!" }); } const { pages = 10 } = req.body; res.json({ message: `Full scrape dimulai — ${pages} halaman`, ok: true }); // Jalankan di background scrapeState.isRunning = true; scrapeState.progress = { current: 0, total: pages, stage: "list" }; (async () => { try { addLog(`🚀 Full scrape dimulai — ${pages} halaman`); const animes = await scrapeAnimeList(`${BASE_URL}/anime/`, pages); addLog(`📋 Dapet ${animes.length} anime dari list`); scrapeState.progress.stage = "detail"; scrapeState.progress.total = animes.length; await scrapeAnimeDetails(animes); addLog(`✅ Full scrape selesai!`); scrapeState.lastRun = new Date().toISOString(); } catch (err) { addLog(`❌ Error: ${err.message}`); } finally { scrapeState.isRunning = false; scrapeState.progress.stage = "idle"; } })(); }); // ── Trigger incremental update ──────────────────────────────────────────────── app.post("/api/scrape/update", async (req, res) => { if (scrapeState.isRunning) { return res.status(409).json({ error: "Scrape lagi jalan!" }); } res.json({ message: "Incremental update dimulai", ok: true }); scrapeState.isRunning = true; scrapeState.progress.stage = "update"; (async () => { try { addLog("🔄 Incremental update dimulai"); await scrapeIncrementalUpdates(); addLog("✅ Update selesai!"); scrapeState.lastRun = new Date().toISOString(); } catch (err) { addLog(`❌ Error: ${err.message}`); } finally { scrapeState.isRunning = false; scrapeState.progress.stage = "idle"; } })(); }); // ── Scrape single anime ─────────────────────────────────────────────────────── app.post("/api/scrape/single", async (req, res) => { const { url } = req.body; if (!url) return res.status(400).json({ error: "URL diperlukan" }); try { addLog(`🎯 Scrape single: ${url}`); const html = await fetchPage(url); const detail = parseAnimeDetail(html, url); const db = getDB(); const slug = extractSlug(url); const { episodes, ...animeData } = detail; await db .collection("animes") .doc(slug) .set({ ...animeData, slug, updatedAt: new Date().toISOString() }, { merge: true }); // Save episodes if (episodes.length > 0) { const batch = db.batch(); episodes.forEach((ep) => { const epId = `ep-${String(ep.number || 0).padStart(4, "0")}`; batch.set( db.collection("animes").doc(slug).collection("episodes").doc(epId), ep, { merge: true } ); }); await batch.commit(); } addLog(`✅ ${detail.title} berhasil discrape (${episodes.length} eps)`); res.json({ success: true, data: detail }); } catch (err) { addLog(`❌ ${err.message}`); res.status(500).json({ error: err.message }); } }); // ── Search scrape ───────────────────────────────────────────────────────────── app.get("/api/scrape/search", async (req, res) => { const { q } = req.query; if (!q) return res.status(400).json({ error: "Query diperlukan" }); try { const results = await scrapeSearch(q); res.json({ query: q, count: results.length, results }); } catch (err) { res.status(500).json({ error: err.message }); } }); // ─── DATABASE ROUTES (baca data dari Firebase) ──────────────────────────────── // Get semua anime (paginated) app.get("/api/animes", async (req, res) => { try { const db = getDB(); const { limit = 20, page = 1, status, genre, type, sort = "title" } = req.query; const lim = Math.min(parseInt(limit), 100); let query = db.collection("animes").where("hasDetails", "==", true); if (status) query = query.where("status", "==", status); if (type) query = query.where("type", "==", type); const snapshot = await query.limit(lim * parseInt(page)).get(); const all = []; snapshot.forEach((doc) => all.push({ id: doc.id, ...doc.data() })); // Client-side pagination const start = (parseInt(page) - 1) * lim; const data = all.slice(start, start + lim); res.json({ total: all.length, page: parseInt(page), limit: lim, data }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Get detail anime app.get("/api/animes/:slug", async (req, res) => { try { const db = getDB(); const doc = await db.collection("animes").doc(req.params.slug).get(); if (!doc.exists) return res.status(404).json({ error: "Anime not found" }); // Get episodes const epsSnap = await db .collection("animes") .doc(req.params.slug) .collection("episodes") .orderBy("number", "asc") .get(); const episodes = []; epsSnap.forEach((ep) => episodes.push(ep.data())); res.json({ id: doc.id, ...doc.data(), episodes }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Search dari Firebase app.get("/api/animes/search/local", async (req, res) => { try { const { q } = req.query; if (!q) return res.status(400).json({ error: "Query diperlukan" }); const db = getDB(); // Firestore nggak support full-text search native, // kita pake range query buat approximate match const snapshot = await db .collection("animes") .orderBy("title") .startAt(q) .endAt(q + "\uf8ff") .limit(20) .get(); const results = []; snapshot.forEach((doc) => results.push({ id: doc.id, ...doc.data() })); res.json({ query: q, count: results.length, results }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Stats app.get("/api/stats", async (req, res) => { try { const db = getDB(); const totalSnap = await db.collection("animes").count().get(); const ongoingSnap = await db .collection("animes") .where("status", "in", ["Ongoing", "ongoing", "Airing"]) .count() .get(); res.json({ totalAnimes: totalSnap.data().count, ongoingAnimes: ongoingSnap.data().count, lastScrape: scrapeState.lastRun, }); } catch (err) { res.status(500).json({ error: err.message }); } }); // ─── SCHEDULER ──────────────────────────────────────────────────────────────── // Cron: update anime ongoing setiap hari jam 06:00 WIB (23:00 UTC) cron.schedule("0 23 * * *", async () => { if (!scrapeState.isRunning) { addLog("⏰ Scheduled incremental update jalan..."); scrapeState.isRunning = true; try { await scrapeIncrementalUpdates(); scrapeState.lastRun = new Date().toISOString(); } catch (err) { addLog(`❌ Scheduled error: ${err.message}`); } finally { scrapeState.isRunning = false; } } }); // ─── START SERVER ───────────────────────────────────────────────────────────── app.listen(PORT, "0.0.0.0", () => { console.log(` ╔═══════════════════════════════════════════════╗ ║ 🍥 Anichin Scraper — Hugging Face Spaces ║ ╠═══════════════════════════════════════════════╣ ║ Port : ${PORT} ║ ║ Mode : ${process.env.NODE_ENV || "development"} ║ ║ Firebase: ${process.env.FIREBASE_PROJECT_ID || "NOT SET ⚠️"} ║ ╚═══════════════════════════════════════════════╝ `); });