Spaces:

Aqso
/

tim

Paused

App Files Files Community

tim / server.js

Aqso's picture

Upload server.js

348eb04 verified 24 days ago

history blame contribute delete

10.3 kB

	// server.js — Anichin Scraper REST API + Scheduler

	require("dotenv").config();
	const express = require("express");
	const cors = require("cors");
	const cron = require("node-cron");
	const { getDB } = require("./firebase");
	const {
	scrapeAnimeList,
	scrapeAnimeDetails,
	scrapeIncrementalUpdates,
	scrapeSearch,
	parseAnimeDetail,
	parseEpisodePage,
	fetchPage,
	extractSlug,
	} = require("./scraper");

	const app = express();
	// HF Spaces wajib PORT 7860
	const PORT = process.env.PORT \|\| 7860;
	const BASE_URL = process.env.BASE_URL \|\| "https://anichin.cafe";

	app.use(cors());
	app.use(express.json());
	app.use(express.static("public")); // serve frontend

	// ─── SCRAPE STATE ─────────────────────────────────────────────────────────────

	const scrapeState = {
	isRunning: false,
	lastRun: null,
	progress: { current: 0, total: 0, stage: "idle" },
	log: [],
	};

	function addLog(msg) {
	const entry = { time: new Date().toISOString(), msg };
	scrapeState.log.unshift(entry);
	scrapeState.log = scrapeState.log.slice(0, 100); // keep last 100
	console.log(msg);
	}

	// ─── API ROUTES ───────────────────────────────────────────────────────────────

	// Status
	app.get("/api/status", (req, res) => {
	res.json({
	status: "ok",
	scrapeRunning: scrapeState.isRunning,
	lastRun: scrapeState.lastRun,
	progress: scrapeState.progress,
	});
	});

	// Scrape logs
	app.get("/api/logs", (req, res) => {
	res.json(scrapeState.log);
	});

	// ── Trigger full scrape ───────────────────────────────────────────────────────
	app.post("/api/scrape/full", async (req, res) => {
	if (scrapeState.isRunning) {
	return res.status(409).json({ error: "Scrape lagi jalan, tunggu dulu!" });
	}

	const { pages = 10 } = req.body;

	res.json({ message: `Full scrape dimulai — ${pages} halaman`, ok: true });

	// Jalankan di background
	scrapeState.isRunning = true;
	scrapeState.progress = { current: 0, total: pages, stage: "list" };

	(async () => {
	try {
	addLog(`🚀 Full scrape dimulai — ${pages} halaman`);
	const animes = await scrapeAnimeList(`${BASE_URL}/anime/`, pages);
	addLog(`📋 Dapet ${animes.length} anime dari list`);

	scrapeState.progress.stage = "detail";
	scrapeState.progress.total = animes.length;

	await scrapeAnimeDetails(animes);
	addLog(`✅ Full scrape selesai!`);
	scrapeState.lastRun = new Date().toISOString();
	} catch (err) {
	addLog(`❌ Error: ${err.message}`);
	} finally {
	scrapeState.isRunning = false;
	scrapeState.progress.stage = "idle";
	}
	})();
	});

	// ── Trigger incremental update ────────────────────────────────────────────────
	app.post("/api/scrape/update", async (req, res) => {
	if (scrapeState.isRunning) {
	return res.status(409).json({ error: "Scrape lagi jalan!" });
	}

	res.json({ message: "Incremental update dimulai", ok: true });

	scrapeState.isRunning = true;
	scrapeState.progress.stage = "update";

	(async () => {
	try {
	addLog("🔄 Incremental update dimulai");
	await scrapeIncrementalUpdates();
	addLog("✅ Update selesai!");
	scrapeState.lastRun = new Date().toISOString();
	} catch (err) {
	addLog(`❌ Error: ${err.message}`);
	} finally {
	scrapeState.isRunning = false;
	scrapeState.progress.stage = "idle";
	}
	})();
	});

	// ── Scrape single anime ───────────────────────────────────────────────────────
	app.post("/api/scrape/single", async (req, res) => {
	const { url } = req.body;
	if (!url) return res.status(400).json({ error: "URL diperlukan" });

	try {
	addLog(`🎯 Scrape single: ${url}`);
	const html = await fetchPage(url);
	const detail = parseAnimeDetail(html, url);
	const db = getDB();
	const slug = extractSlug(url);

	const { episodes, ...animeData } = detail;
	await db
	.collection("animes")
	.doc(slug)
	.set({ ...animeData, slug, updatedAt: new Date().toISOString() }, { merge: true });

	// Save episodes
	if (episodes.length > 0) {
	const batch = db.batch();
	episodes.forEach((ep) => {
	const epId = `ep-${String(ep.number \|\| 0).padStart(4, "0")}`;
	batch.set(
	db.collection("animes").doc(slug).collection("episodes").doc(epId),
	ep,
	{ merge: true }
	);
	});
	await batch.commit();
	}

	addLog(`✅ ${detail.title} berhasil discrape (${episodes.length} eps)`);
	res.json({ success: true, data: detail });
	} catch (err) {
	addLog(`❌ ${err.message}`);
	res.status(500).json({ error: err.message });
	}
	});

	// ── Search scrape ─────────────────────────────────────────────────────────────
	app.get("/api/scrape/search", async (req, res) => {
	const { q } = req.query;
	if (!q) return res.status(400).json({ error: "Query diperlukan" });

	try {
	const results = await scrapeSearch(q);
	res.json({ query: q, count: results.length, results });
	} catch (err) {
	res.status(500).json({ error: err.message });
	}
	});

	// ─── DATABASE ROUTES (baca data dari Firebase) ────────────────────────────────

	// Get semua anime (paginated)
	app.get("/api/animes", async (req, res) => {
	try {
	const db = getDB();
	const { limit = 20, page = 1, status, genre, type, sort = "title" } = req.query;
	const lim = Math.min(parseInt(limit), 100);

	let query = db.collection("animes").where("hasDetails", "==", true);

	if (status) query = query.where("status", "==", status);
	if (type) query = query.where("type", "==", type);

	const snapshot = await query.limit(lim * parseInt(page)).get();
	const all = [];
	snapshot.forEach((doc) => all.push({ id: doc.id, ...doc.data() }));

	// Client-side pagination
	const start = (parseInt(page) - 1) * lim;
	const data = all.slice(start, start + lim);

	res.json({ total: all.length, page: parseInt(page), limit: lim, data });
	} catch (err) {
	res.status(500).json({ error: err.message });
	}
	});

	// Get detail anime
	app.get("/api/animes/:slug", async (req, res) => {
	try {
	const db = getDB();
	const doc = await db.collection("animes").doc(req.params.slug).get();
	if (!doc.exists) return res.status(404).json({ error: "Anime not found" });

	// Get episodes
	const epsSnap = await db
	.collection("animes")
	.doc(req.params.slug)
	.collection("episodes")
	.orderBy("number", "asc")
	.get();

	const episodes = [];
	epsSnap.forEach((ep) => episodes.push(ep.data()));

	res.json({ id: doc.id, ...doc.data(), episodes });
	} catch (err) {
	res.status(500).json({ error: err.message });
	}
	});

	// Search dari Firebase
	app.get("/api/animes/search/local", async (req, res) => {
	try {
	const { q } = req.query;
	if (!q) return res.status(400).json({ error: "Query diperlukan" });

	const db = getDB();
	// Firestore nggak support full-text search native,
	// kita pake range query buat approximate match
	const snapshot = await db
	.collection("animes")
	.orderBy("title")
	.startAt(q)
	.endAt(q + "\uf8ff")
	.limit(20)
	.get();

	const results = [];
	snapshot.forEach((doc) => results.push({ id: doc.id, ...doc.data() }));
	res.json({ query: q, count: results.length, results });
	} catch (err) {
	res.status(500).json({ error: err.message });
	}
	});

	// Stats
	app.get("/api/stats", async (req, res) => {
	try {
	const db = getDB();
	const totalSnap = await db.collection("animes").count().get();
	const ongoingSnap = await db
	.collection("animes")
	.where("status", "in", ["Ongoing", "ongoing", "Airing"])
	.count()
	.get();

	res.json({
	totalAnimes: totalSnap.data().count,
	ongoingAnimes: ongoingSnap.data().count,
	lastScrape: scrapeState.lastRun,
	});
	} catch (err) {
	res.status(500).json({ error: err.message });
	}
	});

	// ─── SCHEDULER ────────────────────────────────────────────────────────────────
	// Cron: update anime ongoing setiap hari jam 06:00 WIB (23:00 UTC)
	cron.schedule("0 23 * * *", async () => {
	if (!scrapeState.isRunning) {
	addLog("⏰ Scheduled incremental update jalan...");
	scrapeState.isRunning = true;
	try {
	await scrapeIncrementalUpdates();
	scrapeState.lastRun = new Date().toISOString();
	} catch (err) {
	addLog(`❌ Scheduled error: ${err.message}`);
	} finally {
	scrapeState.isRunning = false;
	}
	}
	});

	// ─── START SERVER ─────────────────────────────────────────────────────────────
	app.listen(PORT, "0.0.0.0", () => {
	console.log(`
	╔═══════════════════════════════════════════════╗
	║ 🍥 Anichin Scraper — Hugging Face Spaces ║
	╠═══════════════════════════════════════════════╣
	║ Port : ${PORT} ║
	║ Mode : ${process.env.NODE_ENV \|\| "development"} ║
	║ Firebase: ${process.env.FIREBASE_PROJECT_ID \|\| "NOT SET ⚠️"} ║
	╚═══════════════════════════════════════════════╝
	`);
	});