Spaces:
Sleeping
Sleeping
| import { exec } from "child_process"; | |
| import { NextResponse } from "next/server"; | |
| import { promisify } from "util"; | |
| const execAsync = promisify(exec); | |
| const VALID_SCRAPERS = ["dosen", "jadwal", "jurusan", "pnp", "all"]; | |
| export async function POST(request: Request) { | |
| try { | |
| const body = await request.json().catch(() => ({})); | |
| const scraperType = body.type || "all"; | |
| if (!VALID_SCRAPERS.includes(scraperType)) { | |
| return NextResponse.json( | |
| { | |
| error: `Invalid scraper type. Must be one of: ${VALID_SCRAPERS.join(", ")}`, | |
| }, | |
| { status: 400 }, | |
| ); | |
| } | |
| // Set working directory to the scraper folder | |
| const baseCommand = "cd scrapping &&"; | |
| let command; | |
| switch (scraperType) { | |
| case "dosen": | |
| command = `${baseCommand} python3 dosen_scrap.py`; | |
| break; | |
| case "jadwal": | |
| command = `${baseCommand} python3 jadwal_scrap.py`; | |
| break; | |
| case "jurusan": | |
| command = `${baseCommand} python3 jurusan_scrap.py`; | |
| break; | |
| case "pnp": | |
| command = `${baseCommand} python3 pnp_scrap.py`; | |
| break; | |
| case "all": | |
| default: | |
| command = `${baseCommand} python3 run_all.py`; | |
| break; | |
| } | |
| console.log(`[Scraping] Starting process: ${command}`); | |
| // Increased timeout to 15 minutes | |
| const { stdout, stderr } = await execAsync(command, { | |
| timeout: 900000, | |
| maxBuffer: 1024 * 1024 * 5, // 5MB buffer | |
| }); | |
| console.log("[Scraping] Process completed:", { | |
| stdout: stdout, | |
| stderr: stderr, | |
| }); | |
| // Only treat as error if stderr contains actual error messages | |
| if (stderr && !stderr.includes("INFO:") && !stderr.includes("DEBUG:")) { | |
| console.error("[Scraping] Error output:", stderr); | |
| return NextResponse.json( | |
| { | |
| success: false, | |
| error: "Scraping process completed with errors", | |
| details: stderr, | |
| }, | |
| { status: 500 }, | |
| ); | |
| } | |
| // Analyze output to determine if files were uploaded or skipped | |
| const analyzeScrapingResults = (output: string) => { | |
| const uploadedCount = (output.match(/✅ Successfully uploaded|uploaded.*to Supabase/gi) || []).length; | |
| const skippedCount = (output.match(/✅ Content identical - SKIPPING upload|Skipped upload.*content unchanged|⏭️ Skipped upload/gi) || []).length; | |
| if (uploadedCount === 0 && skippedCount > 0) { | |
| return `Scraping ${scraperType} completed - no new data found (${skippedCount} files unchanged)`; | |
| } else if (uploadedCount > 0 && skippedCount === 0) { | |
| return `Scraping ${scraperType} completed - ${uploadedCount} new files uploaded`; | |
| } else if (uploadedCount > 0 && skippedCount > 0) { | |
| return `Scraping ${scraperType} completed - ${uploadedCount} uploaded, ${skippedCount} unchanged`; | |
| } else { | |
| return `Scraping ${scraperType} completed successfully`; | |
| } | |
| }; | |
| const message = analyzeScrapingResults(stdout); | |
| return NextResponse.json({ | |
| success: true, | |
| message: message, | |
| output: stdout, | |
| warnings: stderr || null, | |
| finishedAt: new Date().toISOString(), | |
| }); | |
| } catch (error: any) { | |
| console.error("[Scraping] Failed to execute:", error); | |
| let errorMessage = "Scraping process failed"; | |
| if (error.code === "ETIMEDOUT") { | |
| errorMessage = "Scraping process timed out (took too long)"; | |
| } else if (error.killed) { | |
| errorMessage = "Scraping process was terminated"; | |
| } | |
| return NextResponse.json( | |
| { | |
| success: false, | |
| error: errorMessage, | |
| details: error instanceof Error ? error.message : String(error), | |
| }, | |
| { status: 500 }, | |
| ); | |
| } | |
| } | |
| export const dynamic = "force-dynamic"; | |