| import braveService from '../services/brave.service.js';
|
| import scrapeService from '../services/scrape.service.js';
|
| import geminiService from '../services/gemini.service.js';
|
| import pLimit from 'p-limit';
|
|
|
|
|
| const MAX_SCRAPE_URLS = 5;
|
| const scrapeLimit = pLimit(3);
|
|
|
|
|
| const withTimeout = (promise, ms, label) =>
|
| Promise.race([
|
| promise,
|
| new Promise((_, reject) =>
|
| setTimeout(() => reject(new Error(`Timeout after ${ms}ms: ${label}`)), ms)
|
| ),
|
| ]);
|
|
|
| class SearchController {
|
| async search(req, res) {
|
| try {
|
| const { query, language, freshness } = req.body;
|
|
|
| if (!query) {
|
| return res.status(400).json({ error: 'Query is required' });
|
| }
|
|
|
| const options = {};
|
| if (language) options.language = language;
|
| if (freshness) options.freshness = freshness;
|
|
|
| const data = await braveService.search(query, options);
|
| res.json(data);
|
| } catch (error) {
|
| console.error('Search Error:', error.message);
|
| res.status(500).json({
|
| error: 'Failed to perform search',
|
| details: error.response?.data || error.message
|
| });
|
| }
|
| }
|
|
|
| async searchAndSummarize(req, res) {
|
| try {
|
| const { query, language, freshness } = req.body;
|
|
|
| if (!query) {
|
| return res.status(400).json({ error: 'Query is required' });
|
| }
|
|
|
| const options = {};
|
| if (language) options.language = language;
|
| if (freshness) options.freshness = freshness;
|
|
|
|
|
| console.log('[BRAVE API] Starting search request...');
|
| let searchData;
|
| try {
|
| searchData = await braveService.search(query, options);
|
| console.log('[BRAVE API] Search successful, found results');
|
| } catch (err) {
|
| console.error('[BRAVE API] ERROR:', err.message);
|
| console.error('[BRAVE API] Status:', err.response?.status);
|
| console.error('[BRAVE API] Details:', err.response?.data);
|
|
|
| if (err.response?.status === 429) {
|
| return res.status(429).json({
|
| error: 'Rate limit exceeded',
|
| api: 'Brave Search API',
|
| details: 'Vuot qua gioi han API Brave Search. Vui long thu lai sau it phut.'
|
| });
|
| }
|
| throw err;
|
| }
|
|
|
|
|
| let urls = [];
|
| if (searchData.news?.results && searchData.news.results.length > 0) {
|
| urls = searchData.news.results.slice(0, MAX_SCRAPE_URLS).map(r => r.url);
|
| } else if (searchData.web?.results && searchData.web.results.length > 0) {
|
| urls = searchData.web.results
|
| .filter(r => r.type === 'search_result')
|
| .slice(0, MAX_SCRAPE_URLS)
|
| .map(r => r.url);
|
| }
|
|
|
| if (urls.length === 0) {
|
| return res.status(404).json({ error: 'Khong tim thay ket qua nao' });
|
| }
|
|
|
|
|
| console.log(`[PUPPETEER] Scraping ${urls.length} articles...`);
|
| const scrapePromises = urls.map(async (url, index) => {
|
| try {
|
| const scraped = await scrapeService.scrapeUrl(url);
|
| console.log(`[PUPPETEER] Successfully scraped: ${url}`);
|
| return {
|
| title: scraped.title || `Bai ${index + 1}`,
|
| source: new URL(url).hostname,
|
| content: scraped.text || '',
|
| url: url
|
| };
|
| } catch (err) {
|
| console.error(`[PUPPETEER] Failed to scrape ${url}:`, err.message);
|
| return null;
|
| }
|
| });
|
|
|
| const articles = (await Promise.all(scrapePromises)).filter(a => a !== null);
|
| console.log(`[PUPPETEER] Successfully scraped ${articles.length}/${urls.length} articles`);
|
|
|
| if (articles.length === 0) {
|
| return res.status(500).json({ error: 'Khong the scrape duoc bai bao nao' });
|
| }
|
|
|
|
|
| console.log(`[GEMINI API] Starting summarization of ${articles.length} articles...`);
|
| let summary;
|
| try {
|
| summary = await geminiService.summarizeMultipleNews(articles, query);
|
| console.log('[GEMINI API] Summarization successful');
|
| } catch (err) {
|
| console.error('[GEMINI API] ERROR:', err.message);
|
| console.error('[GEMINI API] Status:', err.response?.status);
|
| console.error('[GEMINI API] Details:', err.response?.data);
|
|
|
| if (err.message.includes('PROHIBITED_CONTENT')) {
|
| return res.status(400).json({
|
| error: 'Prohibited content',
|
| api: 'Google Gemini API',
|
| details: 'Nội dung không phù hợp. Gemini AI đã chặn nội dung này vì vi phạm tiêu chuẩn an toàn.'
|
| });
|
| }
|
|
|
| if (err.response?.status === 429 || err.message.includes('429')) {
|
| return res.status(429).json({
|
| error: 'Rate limit exceeded',
|
| api: 'Google Gemini API',
|
| details: 'Vuot qua gioi han API Gemini. Vui long thu lai sau it phut.'
|
| });
|
| }
|
| throw err;
|
| }
|
|
|
| console.log('[SUCCESS] Request completed successfully');
|
| res.json({
|
| summary: summary.summary,
|
| totalArticles: summary.totalArticles,
|
| articles: articles.map(a => ({
|
| title: a.title,
|
| source: a.source,
|
| url: a.url
|
| }))
|
| });
|
| } catch (error) {
|
| console.error('[ERROR] Search and Summarize Error:', error.message);
|
| console.error('[ERROR] Stack:', error.stack);
|
|
|
| res.status(500).json({
|
| error: 'Failed to search and summarize',
|
| details: error.response?.data?.error || error.message
|
| });
|
| }
|
| }
|
|
|
| async scrapeAndSummarize(req, res) {
|
| const tTotal = Date.now();
|
| try {
|
| const { urls: rawUrls, query } = req.body;
|
|
|
| if (!rawUrls || !Array.isArray(rawUrls) || rawUrls.length === 0) {
|
| return res.status(400).json({ error: 'URLs array is required' });
|
| }
|
|
|
|
|
| const urls = rawUrls.slice(0, MAX_SCRAPE_URLS);
|
| console.log(`\n${'='.repeat(60)}`);
|
| console.log(`[REQUEST] scrapeAndSummarize — ${urls.length} URLs, query="${query?.substring(0,40)}"`);
|
| urls.forEach((u, i) => console.log(` [${i+1}] ${u.substring(0, 80)}`));
|
|
|
|
|
| const tScrapeStart = Date.now();
|
| console.log(`[SCRAPE] ⏳ Starting scrape of ${urls.length} URLs (concurrency: 3)...`);
|
| const scrapeWork = Promise.all(
|
| urls.map((url, index) =>
|
| scrapeLimit(async () => {
|
| const t = Date.now();
|
| try {
|
| const scraped = await scrapeService.scrapeUrl(url);
|
| console.log(`[SCRAPE] ✅ [${index+1}/${urls.length}] ${Date.now()-t}ms — ${url.substring(0, 60)}`);
|
| return {
|
| title: scraped.title || `Bài ${index + 1}`,
|
| source: new URL(url).hostname,
|
| content: scraped.text || '',
|
| url,
|
| };
|
| } catch (err) {
|
| console.error(`[SCRAPE] ❌ [${index+1}/${urls.length}] ${Date.now()-t}ms — ${url.substring(0, 60)}: ${err.message}`);
|
| return null;
|
| }
|
| })
|
| )
|
| );
|
|
|
| const rawArticles = await withTimeout(scrapeWork, 90000, 'scrapeAndSummarize');
|
| const articles = rawArticles.filter((a) => a !== null);
|
| console.log(`[SCRAPE] 🏁 Done: ${articles.length}/${urls.length} OK in ${Date.now()-tScrapeStart}ms (total elapsed: ${Date.now()-tTotal}ms)`);
|
|
|
| if (articles.length === 0) {
|
| return res.status(500).json({ error: 'Khong the scrape duoc bai bao nao' });
|
| }
|
|
|
|
|
| const tGeminiStart = Date.now();
|
| console.log(`[GEMINI] ⏳ Summarizing ${articles.length} articles...`);
|
| let summary;
|
| try {
|
| summary = await geminiService.summarizeMultipleNews(articles, query || '');
|
| console.log(`[GEMINI] ✅ Done in ${Date.now()-tGeminiStart}ms (total elapsed: ${Date.now()-tTotal}ms)`);
|
| } catch (err) {
|
| console.error('[GEMINI API] ERROR:', err.message);
|
| console.error('[GEMINI API] Status:', err.response?.status);
|
| console.error('[GEMINI API] Details:', err.response?.data);
|
|
|
| if (err.message.includes('PROHIBITED_CONTENT')) {
|
| return res.status(400).json({
|
| error: 'Prohibited content',
|
| api: 'Google Gemini API',
|
| details: 'Nội dung không phù hợp. Gemini AI đã chặn nội dung này vì vi phạm tiêu chuẩn an toàn.'
|
| });
|
| }
|
|
|
| if (err.response?.status === 429 || err.message.includes('429')) {
|
| return res.status(429).json({
|
| error: 'Rate limit exceeded',
|
| api: 'Google Gemini API',
|
| details: 'Vuot qua gioi han API Gemini. Vui long thu lai sau it phut.'
|
| });
|
| }
|
| throw err;
|
| }
|
|
|
| console.log(`[SUCCESS] 🏁 scrapeAndSummarize done in ${Date.now()-tTotal}ms total`);
|
| console.log('='.repeat(60));
|
| res.json({
|
| summary: summary.summary,
|
| totalArticles: summary.totalArticles
|
| });
|
| } catch (error) {
|
| console.error('[ERROR] Scrape and Summarize Error:', error.message);
|
| console.error('[ERROR] Stack:', error.stack);
|
|
|
| res.status(500).json({
|
| error: 'Failed to scrape and summarize',
|
| details: error.message
|
| });
|
| }
|
| }
|
| }
|
|
|
| export default new SearchController();
|
|
|