tim / server.js
Aqso's picture
Upload server.js
348eb04 verified
// server.js β€” Anichin Scraper REST API + Scheduler
require("dotenv").config();
const express = require("express");
const cors = require("cors");
const cron = require("node-cron");
const { getDB } = require("./firebase");
const {
scrapeAnimeList,
scrapeAnimeDetails,
scrapeIncrementalUpdates,
scrapeSearch,
parseAnimeDetail,
parseEpisodePage,
fetchPage,
extractSlug,
} = require("./scraper");
const app = express();
// HF Spaces wajib PORT 7860
const PORT = process.env.PORT || 7860;
const BASE_URL = process.env.BASE_URL || "https://anichin.cafe";
app.use(cors());
app.use(express.json());
app.use(express.static("public")); // serve frontend
// ─── SCRAPE STATE ─────────────────────────────────────────────────────────────
const scrapeState = {
isRunning: false,
lastRun: null,
progress: { current: 0, total: 0, stage: "idle" },
log: [],
};
function addLog(msg) {
const entry = { time: new Date().toISOString(), msg };
scrapeState.log.unshift(entry);
scrapeState.log = scrapeState.log.slice(0, 100); // keep last 100
console.log(msg);
}
// ─── API ROUTES ───────────────────────────────────────────────────────────────
// Status
app.get("/api/status", (req, res) => {
res.json({
status: "ok",
scrapeRunning: scrapeState.isRunning,
lastRun: scrapeState.lastRun,
progress: scrapeState.progress,
});
});
// Scrape logs
app.get("/api/logs", (req, res) => {
res.json(scrapeState.log);
});
// ── Trigger full scrape ───────────────────────────────────────────────────────
app.post("/api/scrape/full", async (req, res) => {
if (scrapeState.isRunning) {
return res.status(409).json({ error: "Scrape lagi jalan, tunggu dulu!" });
}
const { pages = 10 } = req.body;
res.json({ message: `Full scrape dimulai β€” ${pages} halaman`, ok: true });
// Jalankan di background
scrapeState.isRunning = true;
scrapeState.progress = { current: 0, total: pages, stage: "list" };
(async () => {
try {
addLog(`πŸš€ Full scrape dimulai β€” ${pages} halaman`);
const animes = await scrapeAnimeList(`${BASE_URL}/anime/`, pages);
addLog(`πŸ“‹ Dapet ${animes.length} anime dari list`);
scrapeState.progress.stage = "detail";
scrapeState.progress.total = animes.length;
await scrapeAnimeDetails(animes);
addLog(`βœ… Full scrape selesai!`);
scrapeState.lastRun = new Date().toISOString();
} catch (err) {
addLog(`❌ Error: ${err.message}`);
} finally {
scrapeState.isRunning = false;
scrapeState.progress.stage = "idle";
}
})();
});
// ── Trigger incremental update ────────────────────────────────────────────────
app.post("/api/scrape/update", async (req, res) => {
if (scrapeState.isRunning) {
return res.status(409).json({ error: "Scrape lagi jalan!" });
}
res.json({ message: "Incremental update dimulai", ok: true });
scrapeState.isRunning = true;
scrapeState.progress.stage = "update";
(async () => {
try {
addLog("πŸ”„ Incremental update dimulai");
await scrapeIncrementalUpdates();
addLog("βœ… Update selesai!");
scrapeState.lastRun = new Date().toISOString();
} catch (err) {
addLog(`❌ Error: ${err.message}`);
} finally {
scrapeState.isRunning = false;
scrapeState.progress.stage = "idle";
}
})();
});
// ── Scrape single anime ───────────────────────────────────────────────────────
app.post("/api/scrape/single", async (req, res) => {
const { url } = req.body;
if (!url) return res.status(400).json({ error: "URL diperlukan" });
try {
addLog(`🎯 Scrape single: ${url}`);
const html = await fetchPage(url);
const detail = parseAnimeDetail(html, url);
const db = getDB();
const slug = extractSlug(url);
const { episodes, ...animeData } = detail;
await db
.collection("animes")
.doc(slug)
.set({ ...animeData, slug, updatedAt: new Date().toISOString() }, { merge: true });
// Save episodes
if (episodes.length > 0) {
const batch = db.batch();
episodes.forEach((ep) => {
const epId = `ep-${String(ep.number || 0).padStart(4, "0")}`;
batch.set(
db.collection("animes").doc(slug).collection("episodes").doc(epId),
ep,
{ merge: true }
);
});
await batch.commit();
}
addLog(`βœ… ${detail.title} berhasil discrape (${episodes.length} eps)`);
res.json({ success: true, data: detail });
} catch (err) {
addLog(`❌ ${err.message}`);
res.status(500).json({ error: err.message });
}
});
// ── Search scrape ─────────────────────────────────────────────────────────────
app.get("/api/scrape/search", async (req, res) => {
const { q } = req.query;
if (!q) return res.status(400).json({ error: "Query diperlukan" });
try {
const results = await scrapeSearch(q);
res.json({ query: q, count: results.length, results });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// ─── DATABASE ROUTES (baca data dari Firebase) ────────────────────────────────
// Get semua anime (paginated)
app.get("/api/animes", async (req, res) => {
try {
const db = getDB();
const { limit = 20, page = 1, status, genre, type, sort = "title" } = req.query;
const lim = Math.min(parseInt(limit), 100);
let query = db.collection("animes").where("hasDetails", "==", true);
if (status) query = query.where("status", "==", status);
if (type) query = query.where("type", "==", type);
const snapshot = await query.limit(lim * parseInt(page)).get();
const all = [];
snapshot.forEach((doc) => all.push({ id: doc.id, ...doc.data() }));
// Client-side pagination
const start = (parseInt(page) - 1) * lim;
const data = all.slice(start, start + lim);
res.json({ total: all.length, page: parseInt(page), limit: lim, data });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Get detail anime
app.get("/api/animes/:slug", async (req, res) => {
try {
const db = getDB();
const doc = await db.collection("animes").doc(req.params.slug).get();
if (!doc.exists) return res.status(404).json({ error: "Anime not found" });
// Get episodes
const epsSnap = await db
.collection("animes")
.doc(req.params.slug)
.collection("episodes")
.orderBy("number", "asc")
.get();
const episodes = [];
epsSnap.forEach((ep) => episodes.push(ep.data()));
res.json({ id: doc.id, ...doc.data(), episodes });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Search dari Firebase
app.get("/api/animes/search/local", async (req, res) => {
try {
const { q } = req.query;
if (!q) return res.status(400).json({ error: "Query diperlukan" });
const db = getDB();
// Firestore nggak support full-text search native,
// kita pake range query buat approximate match
const snapshot = await db
.collection("animes")
.orderBy("title")
.startAt(q)
.endAt(q + "\uf8ff")
.limit(20)
.get();
const results = [];
snapshot.forEach((doc) => results.push({ id: doc.id, ...doc.data() }));
res.json({ query: q, count: results.length, results });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Stats
app.get("/api/stats", async (req, res) => {
try {
const db = getDB();
const totalSnap = await db.collection("animes").count().get();
const ongoingSnap = await db
.collection("animes")
.where("status", "in", ["Ongoing", "ongoing", "Airing"])
.count()
.get();
res.json({
totalAnimes: totalSnap.data().count,
ongoingAnimes: ongoingSnap.data().count,
lastScrape: scrapeState.lastRun,
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// ─── SCHEDULER ────────────────────────────────────────────────────────────────
// Cron: update anime ongoing setiap hari jam 06:00 WIB (23:00 UTC)
cron.schedule("0 23 * * *", async () => {
if (!scrapeState.isRunning) {
addLog("⏰ Scheduled incremental update jalan...");
scrapeState.isRunning = true;
try {
await scrapeIncrementalUpdates();
scrapeState.lastRun = new Date().toISOString();
} catch (err) {
addLog(`❌ Scheduled error: ${err.message}`);
} finally {
scrapeState.isRunning = false;
}
}
});
// ─── START SERVER ─────────────────────────────────────────────────────────────
app.listen(PORT, "0.0.0.0", () => {
console.log(`
╔═══════════════════════════════════════════════╗
β•‘ πŸ₯ Anichin Scraper β€” Hugging Face Spaces β•‘
╠═══════════════════════════════════════════════╣
β•‘ Port : ${PORT} β•‘
β•‘ Mode : ${process.env.NODE_ENV || "development"} β•‘
β•‘ Firebase: ${process.env.FIREBASE_PROJECT_ID || "NOT SET ⚠️"} β•‘
β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
`);
});