Spaces:

WokoVN
/

LT-ML

Paused

App Files Files Community

LT-ML / backend /controllers /search.controller.js

WokoVN's picture

Upload 75 files

bb57163 verified 12 days ago

history blame contribute delete

9.98 kB

	import braveService from '../services/brave.service.js';
	import scrapeService from '../services/scrape.service.js';
	import geminiService from '../services/gemini.service.js';
	import pLimit from 'p-limit';

	// Max 5 URLs, max 3 concurrent scrapes (axios is fast; Puppeteer fallback is slow)
	const MAX_SCRAPE_URLS = 5;
	const scrapeLimit = pLimit(3);

	// Wrap a promise with a hard timeout
	const withTimeout = (promise, ms, label) =>
	Promise.race([
	promise,
	new Promise((_, reject) =>
	setTimeout(() => reject(new Error(`Timeout after ${ms}ms: ${label}`)), ms)
	),
	]);

	class SearchController {
	async search(req, res) {
	try {
	const { query, language, freshness } = req.body;

	if (!query) {
	return res.status(400).json({ error: 'Query is required' });
	}

	const options = {};
	if (language) options.language = language;
	if (freshness) options.freshness = freshness;

	const data = await braveService.search(query, options);
	res.json(data);
	} catch (error) {
	console.error('Search Error:', error.message);
	res.status(500).json({
	error: 'Failed to perform search',
	details: error.response?.data \|\| error.message
	});
	}
	}

	async searchAndSummarize(req, res) {
	try {
	const { query, language, freshness } = req.body;

	if (!query) {
	return res.status(400).json({ error: 'Query is required' });
	}

	const options = {};
	if (language) options.language = language;
	if (freshness) options.freshness = freshness;

	// 1. Search bằng Brave
	console.log('[BRAVE API] Starting search request...');
	let searchData;
	try {
	searchData = await braveService.search(query, options);
	console.log('[BRAVE API] Search successful, found results');
	} catch (err) {
	console.error('[BRAVE API] ERROR:', err.message);
	console.error('[BRAVE API] Status:', err.response?.status);
	console.error('[BRAVE API] Details:', err.response?.data);

	if (err.response?.status === 429) {
	return res.status(429).json({
	error: 'Rate limit exceeded',
	api: 'Brave Search API',
	details: 'Vuot qua gioi han API Brave Search. Vui long thu lai sau it phut.'
	});
	}
	throw err;
	}

	// 2. Lấy URLs từ kết quả (ưu tiên news, fallback về web)
	let urls = [];
	if (searchData.news?.results && searchData.news.results.length > 0) {
	urls = searchData.news.results.slice(0, MAX_SCRAPE_URLS).map(r => r.url);
	} else if (searchData.web?.results && searchData.web.results.length > 0) {
	urls = searchData.web.results
	.filter(r => r.type === 'search_result')
	.slice(0, MAX_SCRAPE_URLS)
	.map(r => r.url);
	}

	if (urls.length === 0) {
	return res.status(404).json({ error: 'Khong tim thay ket qua nao' });
	}

	// 3. Scrape tất cả URLs bằng Puppeteer
	console.log(`[PUPPETEER] Scraping ${urls.length} articles...`);
	const scrapePromises = urls.map(async (url, index) => {
	try {
	const scraped = await scrapeService.scrapeUrl(url);
	console.log(`[PUPPETEER] Successfully scraped: ${url}`);
	return {
	title: scraped.title \|\| `Bai ${index + 1}`,
	source: new URL(url).hostname,
	content: scraped.text \|\| '',
	url: url
	};
	} catch (err) {
	console.error(`[PUPPETEER] Failed to scrape ${url}:`, err.message);
	return null;
	}
	});

	const articles = (await Promise.all(scrapePromises)).filter(a => a !== null);
	console.log(`[PUPPETEER] Successfully scraped ${articles.length}/${urls.length} articles`);

	if (articles.length === 0) {
	return res.status(500).json({ error: 'Khong the scrape duoc bai bao nao' });
	}

	// 4. Gửi tất cả vào Gemini để tóm tắt
	console.log(`[GEMINI API] Starting summarization of ${articles.length} articles...`);
	let summary;
	try {
	summary = await geminiService.summarizeMultipleNews(articles, query);
	console.log('[GEMINI API] Summarization successful');
	} catch (err) {
	console.error('[GEMINI API] ERROR:', err.message);
	console.error('[GEMINI API] Status:', err.response?.status);
	console.error('[GEMINI API] Details:', err.response?.data);

	if (err.message.includes('PROHIBITED_CONTENT')) {
	return res.status(400).json({
	error: 'Prohibited content',
	api: 'Google Gemini API',
	details: 'Nội dung không phù hợp. Gemini AI đã chặn nội dung này vì vi phạm tiêu chuẩn an toàn.'
	});
	}

	if (err.response?.status === 429 \|\| err.message.includes('429')) {
	return res.status(429).json({
	error: 'Rate limit exceeded',
	api: 'Google Gemini API',
	details: 'Vuot qua gioi han API Gemini. Vui long thu lai sau it phut.'
	});
	}
	throw err;
	}

	console.log('[SUCCESS] Request completed successfully');
	res.json({
	summary: summary.summary,
	totalArticles: summary.totalArticles,
	articles: articles.map(a => ({
	title: a.title,
	source: a.source,
	url: a.url
	}))
	});
	} catch (error) {
	console.error('[ERROR] Search and Summarize Error:', error.message);
	console.error('[ERROR] Stack:', error.stack);

	res.status(500).json({
	error: 'Failed to search and summarize',
	details: error.response?.data?.error \|\| error.message
	});
	}
	}

	async scrapeAndSummarize(req, res) {
	const tTotal = Date.now();
	try {
	const { urls: rawUrls, query } = req.body;

	if (!rawUrls \|\| !Array.isArray(rawUrls) \|\| rawUrls.length === 0) {
	return res.status(400).json({ error: 'URLs array is required' });
	}

	// Cap to MAX_SCRAPE_URLS to avoid long waits
	const urls = rawUrls.slice(0, MAX_SCRAPE_URLS);
	console.log(`\n${'='.repeat(60)}`);
	console.log(`[REQUEST] scrapeAndSummarize — ${urls.length} URLs, query="${query?.substring(0,40)}"`);
	urls.forEach((u, i) => console.log(` [${i+1}] ${u.substring(0, 80)}`));

	// 1. Scrape URLs (axios fast path, Puppeteer fallback) with 30s total timeout
	const tScrapeStart = Date.now();
	console.log(`[SCRAPE] ⏳ Starting scrape of ${urls.length} URLs (concurrency: 3)...`);
	const scrapeWork = Promise.all(
	urls.map((url, index) =>
	scrapeLimit(async () => {
	const t = Date.now();
	try {
	const scraped = await scrapeService.scrapeUrl(url);
	console.log(`[SCRAPE] ✅ [${index+1}/${urls.length}] ${Date.now()-t}ms — ${url.substring(0, 60)}`);
	return {
	title: scraped.title \|\| `Bài ${index + 1}`,
	source: new URL(url).hostname,
	content: scraped.text \|\| '',
	url,
	};
	} catch (err) {
	console.error(`[SCRAPE] ❌ [${index+1}/${urls.length}] ${Date.now()-t}ms — ${url.substring(0, 60)}: ${err.message}`);
	return null;
	}
	})
	)
	);

	const rawArticles = await withTimeout(scrapeWork, 90000, 'scrapeAndSummarize');
	const articles = rawArticles.filter((a) => a !== null);
	console.log(`[SCRAPE] 🏁 Done: ${articles.length}/${urls.length} OK in ${Date.now()-tScrapeStart}ms (total elapsed: ${Date.now()-tTotal}ms)`);

	if (articles.length === 0) {
	return res.status(500).json({ error: 'Khong the scrape duoc bai bao nao' });
	}

	// 2. Send to Gemini
	const tGeminiStart = Date.now();
	console.log(`[GEMINI] ⏳ Summarizing ${articles.length} articles...`);
	let summary;
	try {
	summary = await geminiService.summarizeMultipleNews(articles, query \|\| '');
	console.log(`[GEMINI] ✅ Done in ${Date.now()-tGeminiStart}ms (total elapsed: ${Date.now()-tTotal}ms)`);
	} catch (err) {
	console.error('[GEMINI API] ERROR:', err.message);
	console.error('[GEMINI API] Status:', err.response?.status);
	console.error('[GEMINI API] Details:', err.response?.data);

	if (err.message.includes('PROHIBITED_CONTENT')) {
	return res.status(400).json({
	error: 'Prohibited content',
	api: 'Google Gemini API',
	details: 'Nội dung không phù hợp. Gemini AI đã chặn nội dung này vì vi phạm tiêu chuẩn an toàn.'
	});
	}

	if (err.response?.status === 429 \|\| err.message.includes('429')) {
	return res.status(429).json({
	error: 'Rate limit exceeded',
	api: 'Google Gemini API',
	details: 'Vuot qua gioi han API Gemini. Vui long thu lai sau it phut.'
	});
	}
	throw err;
	}

	console.log(`[SUCCESS] 🏁 scrapeAndSummarize done in ${Date.now()-tTotal}ms total`);
	console.log('='.repeat(60));
	res.json({
	summary: summary.summary,
	totalArticles: summary.totalArticles
	});
	} catch (error) {
	console.error('[ERROR] Scrape and Summarize Error:', error.message);
	console.error('[ERROR] Stack:', error.stack);

	res.status(500).json({
	error: 'Failed to scrape and summarize',
	details: error.message
	});
	}
	}
	}

	export default new SearchController();