Rox-Turbo commited on
Commit
5a473bf
·
verified ·
1 Parent(s): df04a51

Upload 19 files

Browse files
Files changed (2) hide show
  1. server.js +825 -122
  2. uploads/.gitkeep +2 -0
server.js CHANGED
@@ -134,46 +134,62 @@ const MODEL_CONTEXT_LIMITS = Object.freeze({
134
  });
135
 
136
  // ==================== DEEP RESEARCH CONFIGURATION ====================
137
- /** @constant {Object} DeepResearch settings for Rox 5 Ultra */
138
  const DEEP_RESEARCH_CONFIG = Object.freeze({
139
  // Maximum tokens for comprehensive, detailed responses (maximum for longest output)
140
  maxTokens: 32768,
141
- // Balanced temperature for creative yet focused research (not too low to avoid repetition)
142
- temperature: 0.55,
143
- // Balanced top_p for coherent, diverse content
144
- top_p: 0.85,
145
  // Extended timeout for thorough research (15 minutes - user can wait for quality)
146
  timeout: 900000,
147
- // Number of search query variations to generate (more = more comprehensive)
148
- searchVariations: 20,
149
- // Maximum articles to read in full (more articles = better understanding)
150
- maxArticlesToRead: 25,
151
- // Minimum response length in words (enforced in prompt)
152
- minResponseWords: 4500,
153
- // Search depth - how many results to fetch per source
154
- searchDepth: 40,
155
- // Article read timeout (ms) - give more time to read each article
156
- articleReadTimeout: 25000,
157
- // Overall search timeout (ms) - 5 minutes for comprehensive search
158
- searchTimeout: 300000,
159
- // Minimum sources to analyze before responding
160
- minSources: 18,
161
  // Prioritize recency - weight for newer content
162
  recencyBoost: true,
163
  // Include date filters in searches
164
  useDateFilters: true,
165
- // Presence penalty to encourage covering all topics (avoid repetition)
166
- presencePenalty: 0.08,
167
- // Frequency penalty to encourage diverse vocabulary
168
- frequencyPenalty: 0.05,
169
- // Minimum content length for article to be considered valid
170
- minArticleLength: 300,
171
- // Maximum content per article to include (characters)
172
- maxArticleContent: 6000,
173
  // Enable relevance scoring for results
174
  useRelevanceScoring: true,
175
- // Minimum relevance score (0-1) for result to be included
176
- minRelevanceScore: 0.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  });
178
 
179
  // ==================== LOGGING ====================
@@ -3014,6 +3030,138 @@ function parseRSSFeed(xml) {
3014
  return '';
3015
  }
3016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3017
  /**
3018
  * Perform comprehensive DeepResearch web search - queries ALL available search APIs extensively
3019
  * Reads full articles and provides real-time status updates
@@ -3052,46 +3200,60 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3052
  const allSearchResults = [];
3053
  let totalSearches = 0;
3054
 
3055
- // ==================== PHASE 1: MULTI-SOURCE PARALLEL SEARCH ====================
3056
- updateStatus('Phase 1: Searching across multiple knowledge sources...');
3057
 
3058
  // Run searches across ALL available APIs in parallel
3059
  const searchPromises = [];
3060
 
3061
- // For each search variation, query multiple search engines
3062
  for (const searchQuery of searchVariations) {
3063
- // SearXNG (aggregates Google, Bing, DuckDuckGo)
3064
  searchPromises.push(
3065
  searchSearXNGWithContent(searchQuery)
3066
- .then(r => ({ query: searchQuery, ...r, source: 'SearXNG' }))
3067
  .catch(() => null)
3068
  );
3069
 
3070
- // DuckDuckGo HTML scraping
3071
  searchPromises.push(
3072
  searchDuckDuckGoHTML(searchQuery)
3073
- .then(r => ({ query: searchQuery, results: r || '', urls: extractUrlsFromText(r), source: 'DuckDuckGo' }))
3074
  .catch(() => null)
3075
  );
3076
 
3077
- // DuckDuckGo Instant Answer API
3078
  searchPromises.push(
3079
  searchDuckDuckGo(searchQuery)
3080
- .then(r => ({ query: searchQuery, results: r || '', urls: [], source: 'DuckDuckGo API' }))
3081
  .catch(() => null)
3082
  );
3083
 
3084
- // Wikipedia
3085
  searchPromises.push(
3086
  searchWikipedia(searchQuery)
3087
  .then(r => ({ query: searchQuery, results: r || '', urls: [], source: 'Wikipedia' }))
3088
  .catch(() => null)
3089
  );
3090
 
3091
- // Bing
 
 
 
 
 
 
 
3092
  searchPromises.push(
3093
  searchBing(searchQuery)
3094
- .then(r => ({ query: searchQuery, results: r || '', urls: extractUrlsFromText(r), source: 'Bing' }))
 
 
 
 
 
 
 
3095
  .catch(() => null)
3096
  );
3097
  }
@@ -3099,71 +3261,123 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3099
  // Add specialized searches based on query type
3100
  const queryType = detectSpecializedQueryType(query);
3101
 
3102
- // Always add these comprehensive research sources
 
 
3103
  searchPromises.push(
3104
- searchArxiv(query).then(r => ({ query, results: r || '', urls: [], source: 'arXiv Research' })).catch(() => null),
3105
- searchOpenLibrary(query).then(r => ({ query, results: r || '', urls: [], source: 'Open Library' })).catch(() => null),
3106
- searchGitHub(query).then(r => ({ query, results: r || '', urls: [], source: 'GitHub' })).catch(() => null),
3107
- searchReddit(query).then(r => ({ query, results: r || '', urls: [], source: 'Reddit' })).catch(() => null),
3108
- fetchGoogleNewsRSS(query).then(r => ({ query, results: r || '', urls: [], source: 'Google News' })).catch(() => null),
3109
- // NEW: Additional free search APIs for comprehensive research
3110
- searchHackerNews(query).then(r => ({ query, results: r || '', urls: [], source: 'Hacker News' })).catch(() => null),
3111
- searchStackOverflow(query).then(r => ({ query, results: r || '', urls: [], source: 'StackOverflow' })).catch(() => null),
3112
- searchMediaWiki(query).then(r => ({ query, results: r || '', urls: [], source: 'MediaWiki' })).catch(() => null)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3113
  );
3114
 
3115
- // Add programming-specific searches for tech queries
3116
- if (/\b(code|programming|developer|api|library|framework|npm|package|module|python|javascript|node|react|vue|angular)\b/i.test(query)) {
 
 
 
 
 
 
 
 
 
3117
  searchPromises.push(
3118
  searchNPM(query).then(r => ({ query, results: r || '', urls: [], source: 'NPM Registry' })).catch(() => null),
3119
- searchPyPI(query).then(r => ({ query, results: r || '', urls: [], source: 'PyPI' })).catch(() => null)
3120
  );
3121
  }
3122
 
3123
- // Add dictionary for definition queries
3124
- if (/\b(define|definition|meaning|what is|what does)\b/i.test(query)) {
3125
- const wordMatch = query.match(/(?:define|definition of|meaning of|what is|what does)\s+(\w+)/i);
3126
  if (wordMatch && wordMatch[1]) {
 
3127
  searchPromises.push(
3128
- searchDictionary(wordMatch[1]).then(r => ({ query, results: r || '', urls: [], source: 'Dictionary' })).catch(() => null)
3129
  );
3130
  }
3131
  }
3132
 
3133
- // Add quotes search for quote-related queries
3134
- if (/\b(quote|quotes|said|saying|famous)\b/i.test(query)) {
 
3135
  searchPromises.push(
3136
- searchQuotes(query).then(r => ({ query, results: r || '', urls: [], source: 'Quotable' })).catch(() => null)
3137
  );
3138
  }
3139
 
3140
- // Add real-time data APIs if relevant
3141
- if (queryType.type === 'crypto' || /\b(crypto|bitcoin|ethereum|btc|eth|coin)\b/i.test(query)) {
 
3142
  searchPromises.push(
3143
  fetchCryptoPrice(queryType.extractedQuery || 'bitcoin')
3144
- .then(r => ({ query, results: r || '', urls: [], source: 'CoinGecko' }))
3145
  .catch(() => null)
3146
  );
3147
  }
3148
 
3149
- if (queryType.type === 'stock' || /\b(stock|share|market|nasdaq|nyse)\b/i.test(query)) {
 
3150
  searchPromises.push(
3151
  fetchStockPrice(queryType.extractedQuery || query)
3152
- .then(r => ({ query, results: r || '', urls: [], source: 'Yahoo Finance' }))
3153
  .catch(() => null)
3154
  );
3155
  }
3156
 
3157
- if (queryType.type === 'weather' || /\b(weather|temperature|forecast|rain|sunny)\b/i.test(query)) {
 
3158
  const location = queryType.extractedQuery || 'New York';
3159
  searchPromises.push(
3160
  fetchWeatherData(location)
3161
- .then(r => ({ query, results: r || '', urls: [], source: 'Open-Meteo' }))
 
 
 
 
 
 
 
 
 
3162
  .catch(() => null)
3163
  );
3164
  }
 
 
 
 
 
 
 
 
 
 
 
 
3165
 
3166
- updateStatus(`Executing ${searchPromises.length} parallel searches across knowledge bases...`);
3167
 
3168
  // Wait for all searches with timeout - use Promise.allSettled for better error handling
3169
  let searchResults = [];
@@ -3213,29 +3427,30 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3213
 
3214
  updateStatus(`Phase 1 complete: Gathered data from ${totalSearches} sources, identified ${allUrls.size} articles to analyze`);
3215
 
3216
- // ==================== PHASE 2: DEEP ARTICLE READING ====================
3217
- updateStatus('Phase 2: Reading and analyzing full article content...');
3218
 
3219
  // Prioritize URLs by domain authority and relevance
3220
  const prioritizedUrls = prioritizeUrlsByQuality(Array.from(allUrls), query);
3221
  const urlsToRead = prioritizedUrls.slice(0, DEEP_RESEARCH_CONFIG.maxArticlesToRead);
3222
  const articleContents = [];
3223
  let articlesRead = 0;
 
3224
 
3225
  if (urlsToRead.length > 0) {
3226
  updateStatus(`Preparing to analyze ${urlsToRead.length} high-quality articles...`);
3227
 
3228
- // Read articles with parallel batching for speed
3229
- const batchSize = 5;
3230
  for (let i = 0; i < urlsToRead.length; i += batchSize) {
3231
  const batch = urlsToRead.slice(i, i + batchSize);
3232
  const batchNum = Math.floor(i / batchSize) + 1;
3233
  const totalBatches = Math.ceil(urlsToRead.length / batchSize);
3234
 
3235
- updateStatus(`Analyzing article batch ${batchNum} of ${totalBatches}...`);
3236
 
3237
  const batchResults = await Promise.all(
3238
- batch.map(async (url, idx) => {
3239
  try {
3240
  const content = await Promise.race([
3241
  fetchFullArticleContent(url),
@@ -3243,54 +3458,110 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3243
  ]);
3244
 
3245
  if (content && content.length >= DEEP_RESEARCH_CONFIG.minArticleLength) {
3246
- return { url, content, domain: getDomainFromUrl(url), success: true };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3247
  }
3248
- return { url, success: false };
3249
  } catch (e) {
3250
- return { url, success: false };
3251
  }
3252
  })
3253
  );
3254
 
3255
  // Process batch results
 
3256
  for (const result of batchResults) {
3257
  if (result.success) {
3258
  articlesRead++;
 
3259
  articleContents.push(result);
 
 
 
3260
  }
3261
  }
3262
 
3263
- updateStatus(`Batch ${batchNum} complete: ${batchResults.filter(r => r.success).length} articles extracted`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3264
  }
3265
 
3266
- updateStatus(`Phase 2 complete: ${articlesRead} articles fully analyzed`);
3267
  }
3268
 
3269
- // ==================== PHASE 3: COMPILE COMPREHENSIVE RESULTS ====================
3270
- updateStatus('Phase 3: Synthesizing research findings into comprehensive report...');
3271
 
3272
  if (allSearchResults.length === 0 && articleContents.length === 0) {
3273
  return { success: false, results: '', source: '', searchCount: 0, articlesRead: 0, statusUpdates };
3274
  }
3275
 
3276
- // Deduplicate and score results for quality
3277
- const seenContent = new Set();
3278
- const uniqueResults = [];
 
 
 
3279
 
3280
- for (const result of allSearchResults) {
3281
- if (!result || !result.results) continue;
3282
-
3283
- // Create content hash for deduplication
3284
- const contentKey = result.results.substring(0, 200).toLowerCase().replace(/\s+/g, ' ');
3285
- if (!seenContent.has(contentKey)) {
3286
- seenContent.add(contentKey);
3287
- uniqueResults.push(result);
3288
- }
3289
  }
3290
 
3291
  // Group results by source for organized output
3292
  const resultsBySource = {};
3293
- for (const result of uniqueResults) {
3294
  const source = result.source || 'Unknown';
3295
  if (!resultsBySource[source]) {
3296
  resultsBySource[source] = [];
@@ -3302,10 +3573,43 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3302
  let combinedResults = '';
3303
  let sourceIndex = 1;
3304
 
3305
- // Prioritize sources by authority
3306
- const sourceOrder = ['Wikipedia', 'arXiv Research', 'Google News', 'Reuters', 'BBC',
3307
- 'SearXNG', 'Bing', 'DuckDuckGo', 'GitHub', 'StackOverflow',
3308
- 'Reddit', 'Hacker News', 'NPM Registry', 'PyPI', 'Open Library'];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3309
 
3310
  const sortedSources = Object.keys(resultsBySource).sort((a, b) => {
3311
  const aIndex = sourceOrder.findIndex(s => a.includes(s));
@@ -3329,25 +3633,30 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3329
  sourceIndex++;
3330
  }
3331
 
3332
- // Add full article content with better formatting and deduplication
3333
  if (articleContents.length > 0) {
3334
- combinedResults += `\n## FULL ARTICLE ANALYSIS\n`;
3335
- combinedResults += `*${articleContents.length} high-quality articles analyzed*\n\n`;
 
 
 
 
3336
 
3337
- // Deduplicate articles by content similarity
3338
- const seenArticles = new Set();
3339
  let articleNum = 1;
3340
 
3341
- for (const article of articleContents) {
3342
- const articleKey = article.content.substring(0, 300).toLowerCase().replace(/\s+/g, ' ');
3343
- if (seenArticles.has(articleKey)) continue;
3344
- seenArticles.add(articleKey);
3345
 
3346
  combinedResults += `### Article ${articleNum}: ${article.domain}\n`;
3347
  combinedResults += `**URL:** ${article.url}\n`;
 
3348
 
3349
  // Use configured max content length
3350
- const maxLen = DEEP_RESEARCH_CONFIG.maxArticleContent || 6000;
3351
  const truncatedContent = article.content.length > maxLen
3352
  ? article.content.substring(0, maxLen) + '\n\n[...content truncated for brevity]'
3353
  : article.content;
@@ -3356,20 +3665,31 @@ async function performDeepResearchSearch(query, clientIP = '', statusCallback =
3356
  }
3357
  }
3358
 
3359
- const allSources = [...new Set(uniqueResults.map(r => r.source))];
3360
  const duration = Date.now() - startTime;
3361
 
3362
- updateStatus(`Research complete: ${totalSearches} searches, ${articlesRead} articles in ${(duration / 1000).toFixed(1)}s`);
3363
-
3364
- const finalResults = `## DEEP RESEARCH ANALYSIS REPORT
3365
-
3366
- **Research Statistics:**
3367
- - Searches Executed: ${totalSearches}
3368
- - Articles Analyzed: ${articlesRead}
3369
- - Unique Sources: ${allSources.length}
3370
- - Duration: ${(duration / 1000).toFixed(1)}s
3371
- - Date: ${new Date().toLocaleDateString('en-IN', { day: 'numeric', month: 'long', year: 'numeric' })}
3372
- - Sources: ${allSources.slice(0, 8).join(', ')}${allSources.length > 8 ? ` (+${allSources.length - 8} more)` : ''}
 
 
 
 
 
 
 
 
 
 
 
3373
 
3374
  ---
3375
 
@@ -3378,7 +3698,7 @@ ${combinedResults}`;
3378
  return {
3379
  success: true,
3380
  results: finalResults,
3381
- source: `DeepResearch (${allSources.slice(0, 5).join(', ')}${allSources.length > 5 ? ` +${allSources.length - 5} more` : ''})`,
3382
  searchCount: totalSearches,
3383
  articlesRead,
3384
  statusUpdates
@@ -5872,6 +6192,389 @@ function searchPyPI(query) {
5872
  });
5873
  }
5874
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5875
  /**
5876
  * Get user location from IP using IP-API (100% free, no API key)
5877
  * Note: This works for server-side detection, not client IP
 
134
  });
135
 
136
  // ==================== DEEP RESEARCH CONFIGURATION ====================
137
+ /** @constant {Object} DeepResearch settings for Rox 5 Ultra - Production-Optimized */
138
  const DEEP_RESEARCH_CONFIG = Object.freeze({
139
  // Maximum tokens for comprehensive, detailed responses (maximum for longest output)
140
  maxTokens: 32768,
141
+ // Optimized temperature for focused yet comprehensive research (0.45 = more focused, less repetition)
142
+ temperature: 0.45,
143
+ // Optimized top_p for coherent, high-quality content (0.88 = better quality control)
144
+ top_p: 0.88,
145
  // Extended timeout for thorough research (15 minutes - user can wait for quality)
146
  timeout: 900000,
147
+ // Number of search query variations to generate (25 = optimal coverage without redundancy)
148
+ searchVariations: 25,
149
+ // Maximum articles to read in full (30 = deeper analysis with more sources)
150
+ maxArticlesToRead: 30,
151
+ // Minimum response length in words (5000 = more comprehensive reports)
152
+ minResponseWords: 5000,
153
+ // Search depth - how many results to fetch per source (50 = maximum coverage)
154
+ searchDepth: 50,
155
+ // Article read timeout (ms) - optimized for reliable extraction (20s per article)
156
+ articleReadTimeout: 20000,
157
+ // Overall search timeout (ms) - 6 minutes for comprehensive search (360s)
158
+ searchTimeout: 360000,
159
+ // Minimum sources to analyze before responding (20 = better source diversity)
160
+ minSources: 20,
161
  // Prioritize recency - weight for newer content
162
  recencyBoost: true,
163
  // Include date filters in searches
164
  useDateFilters: true,
165
+ // Presence penalty to encourage covering all topics (0.12 = stronger anti-repetition)
166
+ presencePenalty: 0.12,
167
+ // Frequency penalty to encourage diverse vocabulary (0.08 = more varied language)
168
+ frequencyPenalty: 0.08,
169
+ // Minimum content length for article to be considered valid (400 = higher quality threshold)
170
+ minArticleLength: 400,
171
+ // Maximum content per article to include (8000 = more context per article)
172
+ maxArticleContent: 8000,
173
  // Enable relevance scoring for results
174
  useRelevanceScoring: true,
175
+ // Minimum relevance score (0-1) for result to be included (0.35 = higher quality bar)
176
+ minRelevanceScore: 0.35,
177
+ // Parallel batch size for article reading (6 = optimal speed/reliability balance)
178
+ articleBatchSize: 6,
179
+ // Enable smart deduplication using content hashing
180
+ enableDeduplication: true,
181
+ // Content quality threshold (0-1) - minimum quality score to include (0.6 = 60% quality)
182
+ qualityThreshold: 0.6,
183
+ // Enable source diversity enforcement (prevents single-source dominance)
184
+ enforceDiversity: true,
185
+ // Maximum results per source (5 = balanced diversity)
186
+ maxResultsPerSource: 5,
187
+ // Enable semantic clustering for better organization
188
+ enableClustering: true,
189
+ // Retry failed article fetches automatically
190
+ retryFailedArticles: true,
191
+ // Maximum retries per article (2 = good reliability without excessive delays)
192
+ maxArticleRetries: 2
193
  });
194
 
195
  // ==================== LOGGING ====================
 
3030
  return '';
3031
  }
3032
 
3033
+ // ==================== DEEPRESEARCH PRODUCTION-GRADE HELPER FUNCTIONS ====================
3034
+
3035
+ /**
3036
+ * Generate MD5 content hash for deduplication
3037
+ * Uses first 300 chars to balance uniqueness and similarity detection
3038
+ * @param {string} content - Content to hash
3039
+ * @returns {string} MD5 hash string
3040
+ */
3041
+ function generateContentHash(content) {
3042
+ if (!content || typeof content !== 'string') return '';
3043
+ try {
3044
+ // Normalize content: lowercase, single spaces, alphanumeric only
3045
+ const normalized = content.substring(0, 300).toLowerCase()
3046
+ .replace(/\s+/g, ' ')
3047
+ .replace(/[^\w\s]/g, '')
3048
+ .trim();
3049
+ return crypto.createHash('md5').update(normalized).digest('hex');
3050
+ } catch (e) {
3051
+ return '';
3052
+ }
3053
+ }
3054
+
3055
+ /**
3056
+ * Calculate content quality score using multiple metrics
3057
+ * @param {string} content - Content to score
3058
+ * @param {string} query - Original query for relevance scoring
3059
+ * @returns {number} Quality score between 0 and 1
3060
+ */
3061
+ function calculateContentQuality(content, query = '') {
3062
+ if (!content || typeof content !== 'string') return 0;
3063
+
3064
+ let score = 0.5; // Base score
3065
+ const length = content.length;
3066
+ const words = content.split(/\s+/).filter(w => w.length > 0);
3067
+ const wordCount = words.length;
3068
+
3069
+ // Length scoring (optimal: 500-5000 chars)
3070
+ if (length >= 500 && length <= 5000) score += 0.15;
3071
+ else if (length > 5000 && length <= 10000) score += 0.10;
3072
+ else if (length < 200) score -= 0.2;
3073
+
3074
+ // Word count scoring (optimal: 100-1000 words)
3075
+ if (wordCount >= 100 && wordCount <= 1000) score += 0.10;
3076
+ else if (wordCount > 1000) score += 0.05;
3077
+ else if (wordCount < 30) score -= 0.15;
3078
+
3079
+ // Sentence structure validation (minimum 5 proper sentences)
3080
+ const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 10);
3081
+ if (sentences.length >= 5) score += 0.10;
3082
+
3083
+ // Vocabulary diversity check (penalize excessive repetition)
3084
+ const uniqueWords = new Set(words.map(w => w.toLowerCase()));
3085
+ const uniqueRatio = uniqueWords.size / wordCount;
3086
+ if (uniqueRatio < 0.3) score -= 0.15; // Too repetitive
3087
+ else if (uniqueRatio > 0.5) score += 0.05; // Good diversity
3088
+
3089
+ // Query relevance scoring (boost if query terms present)
3090
+ if (query) {
3091
+ const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
3092
+ const contentLower = content.toLowerCase();
3093
+ let relevanceCount = 0;
3094
+ for (const qWord of queryWords) {
3095
+ if (contentLower.includes(qWord)) relevanceCount++;
3096
+ }
3097
+ const relevanceRatio = queryWords.length > 0 ? relevanceCount / queryWords.length : 0;
3098
+ score += relevanceRatio * 0.15;
3099
+ }
3100
+
3101
+ // Penalize if mostly special characters or numbers
3102
+ const alphaCount = (content.match(/[a-zA-Z]/g) || []).length;
3103
+ const alphaRatio = alphaCount / length;
3104
+ if (alphaRatio < 0.5) score -= 0.2;
3105
+
3106
+ // Ensure score is between 0 and 1
3107
+ return Math.max(0, Math.min(1, score));
3108
+ }
3109
+
3110
+ /**
3111
+ * Deduplicate results using content hashing
3112
+ * @param {Array} results - Array of result objects with 'results' property
3113
+ * @returns {Array} Deduplicated results array
3114
+ */
3115
+ function deduplicateResults(results) {
3116
+ if (!Array.isArray(results) || results.length === 0) return [];
3117
+
3118
+ const seen = new Set();
3119
+ const unique = [];
3120
+
3121
+ for (const result of results) {
3122
+ if (!result || !result.results) continue;
3123
+
3124
+ const hash = generateContentHash(result.results);
3125
+ if (hash && !seen.has(hash)) {
3126
+ seen.add(hash);
3127
+ unique.push(result);
3128
+ }
3129
+ }
3130
+
3131
+ return unique;
3132
+ }
3133
+
3134
+ /**
3135
+ * Group results by source and enforce diversity limits
3136
+ * Prevents single-source dominance in results
3137
+ * @param {Array} results - Array of result objects with 'source' property
3138
+ * @param {number} maxPerSource - Maximum results per source (default: 5)
3139
+ * @returns {Array} Diversity-enforced results array
3140
+ */
3141
+ function groupResultsBySource(results, maxPerSource = 5) {
3142
+ if (!Array.isArray(results) || results.length === 0) return [];
3143
+
3144
+ const bySource = {};
3145
+
3146
+ // Group by source
3147
+ for (const result of results) {
3148
+ const source = result.source || 'Unknown';
3149
+ if (!bySource[source]) {
3150
+ bySource[source] = [];
3151
+ }
3152
+ bySource[source].push(result);
3153
+ }
3154
+
3155
+ // Limit per source and flatten back to array
3156
+ const limited = [];
3157
+ for (const source in bySource) {
3158
+ const sourceResults = bySource[source].slice(0, maxPerSource);
3159
+ limited.push(...sourceResults);
3160
+ }
3161
+
3162
+ return limited;
3163
+ }
3164
+
3165
  /**
3166
  * Perform comprehensive DeepResearch web search - queries ALL available search APIs extensively
3167
  * Reads full articles and provides real-time status updates
 
3200
  const allSearchResults = [];
3201
  let totalSearches = 0;
3202
 
3203
+ // ==================== PHASE 1: MULTI-SOURCE PARALLEL SEARCH (PREMIUM EDITION) ====================
3204
+ updateStatus('Phase 1: Launching comprehensive multi-source search across 15+ knowledge bases...');
3205
 
3206
  // Run searches across ALL available APIs in parallel
3207
  const searchPromises = [];
3208
 
3209
+ // For each search variation, query multiple search engines (PREMIUM: More engines per variation)
3210
  for (const searchQuery of searchVariations) {
3211
+ // SearXNG (aggregates Google, Bing, DuckDuckGo) - PREMIUM META-SEARCH
3212
  searchPromises.push(
3213
  searchSearXNGWithContent(searchQuery)
3214
+ .then(r => ({ query: searchQuery, ...r, source: 'SearXNG Meta-Search' }))
3215
  .catch(() => null)
3216
  );
3217
 
3218
+ // DuckDuckGo HTML scraping - PREMIUM WEB SCRAPING
3219
  searchPromises.push(
3220
  searchDuckDuckGoHTML(searchQuery)
3221
+ .then(r => ({ query: searchQuery, results: r || '', urls: extractUrlsFromText(r), source: 'DuckDuckGo Web' }))
3222
  .catch(() => null)
3223
  );
3224
 
3225
+ // DuckDuckGo Instant Answer API - PREMIUM INSTANT ANSWERS
3226
  searchPromises.push(
3227
  searchDuckDuckGo(searchQuery)
3228
+ .then(r => ({ query: searchQuery, results: r || '', urls: [], source: 'DuckDuckGo Instant' }))
3229
  .catch(() => null)
3230
  );
3231
 
3232
+ // Wikipedia - PREMIUM ENCYCLOPEDIA
3233
  searchPromises.push(
3234
  searchWikipedia(searchQuery)
3235
  .then(r => ({ query: searchQuery, results: r || '', urls: [], source: 'Wikipedia' }))
3236
  .catch(() => null)
3237
  );
3238
 
3239
+ // Wikipedia API (secondary method) - PREMIUM BACKUP
3240
+ searchPromises.push(
3241
+ searchWikipediaAPI(searchQuery)
3242
+ .then(r => ({ query: searchQuery, results: r || '', urls: [], source: 'Wikipedia API' }))
3243
+ .catch(() => null)
3244
+ );
3245
+
3246
+ // Bing Web Search - PREMIUM SEARCH ENGINE
3247
  searchPromises.push(
3248
  searchBing(searchQuery)
3249
+ .then(r => ({ query: searchQuery, results: r || '', urls: extractUrlsFromText(r), source: 'Bing Search' }))
3250
+ .catch(() => null)
3251
+ );
3252
+
3253
+ // Bing HTML Scraping - PREMIUM WEB EXTRACTION
3254
+ searchPromises.push(
3255
+ searchBingHTML(searchQuery)
3256
+ .then(r => ({ query: searchQuery, results: r || '', urls: extractUrlsFromText(r), source: 'Bing Web' }))
3257
  .catch(() => null)
3258
  );
3259
  }
 
3261
  // Add specialized searches based on query type
3262
  const queryType = detectSpecializedQueryType(query);
3263
 
3264
+ updateStatus('Phase 1: Adding specialized knowledge sources (academic, news, tech, community)...');
3265
+
3266
+ // PREMIUM: Always add these comprehensive research sources (ALL queries)
3267
  searchPromises.push(
3268
+ // Academic & Research
3269
+ searchArxiv(query).then(r => ({ query, results: r || '', urls: [], source: 'arXiv Research Papers' })).catch(() => null),
3270
+ searchSemanticScholar(query).then(r => ({ query, results: r || '', urls: [], source: 'Semantic Scholar' })).catch(() => null),
3271
+ searchOpenLibrary(query).then(r => ({ query, results: r || '', urls: [], source: 'Open Library Books' })).catch(() => null),
3272
+
3273
+ // Developer & Tech
3274
+ searchGitHub(query).then(r => ({ query, results: r || '', urls: [], source: 'GitHub Repositories' })).catch(() => null),
3275
+ searchHackerNews(query).then(r => ({ query, results: r || '', urls: [], source: 'Hacker News Tech' })).catch(() => null),
3276
+ searchStackOverflow(query).then(r => ({ query, results: r || '', urls: [], source: 'StackOverflow Q&A' })).catch(() => null),
3277
+
3278
+ // News & Media
3279
+ fetchGoogleNewsRSS(query).then(r => ({ query, results: r || '', urls: [], source: 'Google News RSS' })).catch(() => null),
3280
+
3281
+ // Community & Social
3282
+ searchReddit(query).then(r => ({ query, results: r || '', urls: [], source: 'Reddit Discussions' })).catch(() => null),
3283
+
3284
+ // Encyclopedia & Reference
3285
+ searchMediaWiki(query).then(r => ({ query, results: r || '', urls: [], source: 'MediaWiki Encyclopedia' })).catch(() => null),
3286
+ searchWikiquote(query).then(r => ({ query, results: r || '', urls: [], source: 'Wikiquote' })).catch(() => null),
3287
+ searchDBpedia(query).then(r => ({ query, results: r || '', urls: [], source: 'DBpedia Knowledge' })).catch(() => null),
3288
+ searchWikidata(query).then(r => ({ query, results: r || '', urls: [], source: 'Wikidata Structured' })).catch(() => null),
3289
+
3290
+ // Historical & Archives
3291
+ searchInternetArchive(query).then(r => ({ query, results: r || '', urls: [], source: 'Internet Archive' })).catch(() => null)
3292
  );
3293
 
3294
+ // PREMIUM: Add medical/health research for health-related queries
3295
+ if (/\b(health|medical|disease|medicine|treatment|symptom|doctor|hospital|drug|vaccine|covid|cancer|diabetes|heart|brain|therapy|clinical|patient)\b/i.test(query)) {
3296
+ updateStatus('Phase 1: Detected medical query - adding PubMed medical research...');
3297
+ searchPromises.push(
3298
+ searchPubMed(query).then(r => ({ query, results: r || '', urls: [], source: 'PubMed Medical Research' })).catch(() => null)
3299
+ );
3300
+ }
3301
+
3302
+ // PREMIUM: Add programming-specific searches for tech queries
3303
+ if (/\b(code|programming|developer|api|library|framework|npm|package|module|python|javascript|node|react|vue|angular|typescript|java|c\+\+|rust|go|kotlin|swift)\b/i.test(query)) {
3304
+ updateStatus('Phase 1: Detected tech query - adding programming package registries...');
3305
  searchPromises.push(
3306
  searchNPM(query).then(r => ({ query, results: r || '', urls: [], source: 'NPM Registry' })).catch(() => null),
3307
+ searchPyPI(query).then(r => ({ query, results: r || '', urls: [], source: 'PyPI Python Packages' })).catch(() => null)
3308
  );
3309
  }
3310
 
3311
+ // PREMIUM: Add dictionary for definition queries
3312
+ if (/\b(define|definition|meaning|what is|what does|explain|describe)\b/i.test(query)) {
3313
+ const wordMatch = query.match(/(?:define|definition of|meaning of|what is|what does|explain|describe)\s+(\w+)/i);
3314
  if (wordMatch && wordMatch[1]) {
3315
+ updateStatus('Phase 1: Detected definition query - adding dictionary API...');
3316
  searchPromises.push(
3317
+ searchDictionary(wordMatch[1]).then(r => ({ query, results: r || '', urls: [], source: 'Dictionary API' })).catch(() => null)
3318
  );
3319
  }
3320
  }
3321
 
3322
+ // PREMIUM: Add quotes search for quote-related queries
3323
+ if (/\b(quote|quotes|said|saying|famous|wisdom|inspiration)\b/i.test(query)) {
3324
+ updateStatus('Phase 1: Detected quotes query - adding quotes API...');
3325
  searchPromises.push(
3326
+ searchQuotes(query).then(r => ({ query, results: r || '', urls: [], source: 'Quotable API' })).catch(() => null)
3327
  );
3328
  }
3329
 
3330
+ // PREMIUM: Add real-time data APIs if relevant
3331
+ if (queryType.type === 'crypto' || /\b(crypto|bitcoin|ethereum|btc|eth|coin|blockchain|defi|nft)\b/i.test(query)) {
3332
+ updateStatus('Phase 1: Detected crypto query - adding live cryptocurrency data...');
3333
  searchPromises.push(
3334
  fetchCryptoPrice(queryType.extractedQuery || 'bitcoin')
3335
+ .then(r => ({ query, results: r || '', urls: [], source: 'CoinGecko Live Crypto' }))
3336
  .catch(() => null)
3337
  );
3338
  }
3339
 
3340
+ if (queryType.type === 'stock' || /\b(stock|share|market|nasdaq|nyse|nifty|sensex|trading)\b/i.test(query)) {
3341
+ updateStatus('Phase 1: Detected stock query - adding live market data...');
3342
  searchPromises.push(
3343
  fetchStockPrice(queryType.extractedQuery || query)
3344
+ .then(r => ({ query, results: r || '', urls: [], source: 'Yahoo Finance Live' }))
3345
  .catch(() => null)
3346
  );
3347
  }
3348
 
3349
+ if (queryType.type === 'weather' || /\b(weather|temperature|forecast|rain|sunny|climate)\b/i.test(query)) {
3350
+ updateStatus('Phase 1: Detected weather query - adding live weather data...');
3351
  const location = queryType.extractedQuery || 'New York';
3352
  searchPromises.push(
3353
  fetchWeatherData(location)
3354
+ .then(r => ({ query, results: r || '', urls: [], source: 'Open-Meteo Weather' }))
3355
+ .catch(() => null)
3356
+ );
3357
+ }
3358
+
3359
+ if (queryType.type === 'sports' || /\b(sports|cricket|football|basketball|ipl|nba|score|match)\b/i.test(query)) {
3360
+ updateStatus('Phase 1: Detected sports query - adding live sports scores...');
3361
+ searchPromises.push(
3362
+ fetchSportsScores(queryType.extractedQuery || query)
3363
+ .then(r => ({ query, results: r || '', urls: [], source: 'TheSportsDB Live' }))
3364
  .catch(() => null)
3365
  );
3366
  }
3367
+
3368
+ if (/\b(currency|exchange|convert|usd|eur|inr|gbp|forex)\b/i.test(query)) {
3369
+ updateStatus('Phase 1: Detected currency query - adding live exchange rates...');
3370
+ const currencyMatch = query.match(/(\w{3})\s+(?:to|in)\s+(\w{3})/i);
3371
+ if (currencyMatch) {
3372
+ searchPromises.push(
3373
+ fetchCurrencyExchange(currencyMatch[1], currencyMatch[2])
3374
+ .then(r => ({ query, results: r || '', urls: [], source: 'Currency Exchange API' }))
3375
+ .catch(() => null)
3376
+ );
3377
+ }
3378
+ }
3379
 
3380
+ updateStatus(`Phase 1: Executing ${searchPromises.length} parallel searches across premium knowledge bases...`);
3381
 
3382
  // Wait for all searches with timeout - use Promise.allSettled for better error handling
3383
  let searchResults = [];
 
3427
 
3428
  updateStatus(`Phase 1 complete: Gathered data from ${totalSearches} sources, identified ${allUrls.size} articles to analyze`);
3429
 
3430
+ // ==================== PHASE 2: DEEP ARTICLE READING WITH QUALITY SCORING ====================
3431
+ updateStatus('Phase 2: Reading and analyzing full article content with quality scoring...');
3432
 
3433
  // Prioritize URLs by domain authority and relevance
3434
  const prioritizedUrls = prioritizeUrlsByQuality(Array.from(allUrls), query);
3435
  const urlsToRead = prioritizedUrls.slice(0, DEEP_RESEARCH_CONFIG.maxArticlesToRead);
3436
  const articleContents = [];
3437
  let articlesRead = 0;
3438
+ const failedUrls = []; // Track failed URLs for retry mechanism
3439
 
3440
  if (urlsToRead.length > 0) {
3441
  updateStatus(`Preparing to analyze ${urlsToRead.length} high-quality articles...`);
3442
 
3443
+ // Read articles with parallel batching using configurable batch size
3444
+ const batchSize = DEEP_RESEARCH_CONFIG.articleBatchSize || 6;
3445
  for (let i = 0; i < urlsToRead.length; i += batchSize) {
3446
  const batch = urlsToRead.slice(i, i + batchSize);
3447
  const batchNum = Math.floor(i / batchSize) + 1;
3448
  const totalBatches = Math.ceil(urlsToRead.length / batchSize);
3449
 
3450
+ updateStatus(`Analyzing article batch ${batchNum}/${totalBatches} (${batch.length} articles)...`);
3451
 
3452
  const batchResults = await Promise.all(
3453
+ batch.map(async (url) => {
3454
  try {
3455
  const content = await Promise.race([
3456
  fetchFullArticleContent(url),
 
3458
  ]);
3459
 
3460
  if (content && content.length >= DEEP_RESEARCH_CONFIG.minArticleLength) {
3461
+ // Calculate quality score for this article
3462
+ const qualityScore = calculateContentQuality(content, query);
3463
+
3464
+ // Apply quality threshold filter
3465
+ if (qualityScore >= DEEP_RESEARCH_CONFIG.qualityThreshold) {
3466
+ return {
3467
+ url,
3468
+ content,
3469
+ domain: getDomainFromUrl(url),
3470
+ qualityScore,
3471
+ success: true
3472
+ };
3473
+ } else {
3474
+ return { url, success: false, reason: 'quality', qualityScore };
3475
+ }
3476
  }
3477
+ return { url, success: false, reason: 'length' };
3478
  } catch (e) {
3479
+ return { url, success: false, reason: 'error', error: e.message };
3480
  }
3481
  })
3482
  );
3483
 
3484
  // Process batch results
3485
+ let batchSuccessCount = 0;
3486
  for (const result of batchResults) {
3487
  if (result.success) {
3488
  articlesRead++;
3489
+ batchSuccessCount++;
3490
  articleContents.push(result);
3491
+ } else if (DEEP_RESEARCH_CONFIG.retryFailedArticles && result.reason === 'error') {
3492
+ // Track failed articles for retry
3493
+ failedUrls.push(result.url);
3494
  }
3495
  }
3496
 
3497
+ updateStatus(`Batch ${batchNum} complete: ${batchSuccessCount}/${batch.length} articles extracted (quality threshold: ${(DEEP_RESEARCH_CONFIG.qualityThreshold * 100).toFixed(0)}%)`);
3498
+ }
3499
+
3500
+ // Retry failed articles if enabled
3501
+ if (DEEP_RESEARCH_CONFIG.retryFailedArticles && failedUrls.length > 0) {
3502
+ const maxRetries = DEEP_RESEARCH_CONFIG.maxArticleRetries || 2;
3503
+ updateStatus(`Retrying ${failedUrls.length} failed articles (max ${maxRetries} attempts)...`);
3504
+
3505
+ for (let retry = 0; retry < maxRetries && failedUrls.length > 0; retry++) {
3506
+ const retryBatch = failedUrls.splice(0, Math.min(3, failedUrls.length)); // Retry 3 at a time
3507
+
3508
+ const retryResults = await Promise.all(
3509
+ retryBatch.map(async (url) => {
3510
+ try {
3511
+ const content = await Promise.race([
3512
+ fetchFullArticleContent(url),
3513
+ new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), DEEP_RESEARCH_CONFIG.articleReadTimeout))
3514
+ ]);
3515
+
3516
+ if (content && content.length >= DEEP_RESEARCH_CONFIG.minArticleLength) {
3517
+ const qualityScore = calculateContentQuality(content, query);
3518
+ if (qualityScore >= DEEP_RESEARCH_CONFIG.qualityThreshold) {
3519
+ return { url, content, domain: getDomainFromUrl(url), qualityScore, success: true };
3520
+ }
3521
+ }
3522
+ return { url, success: false };
3523
+ } catch (e) {
3524
+ return { url, success: false };
3525
+ }
3526
+ })
3527
+ );
3528
+
3529
+ for (const result of retryResults) {
3530
+ if (result.success) {
3531
+ articlesRead++;
3532
+ articleContents.push(result);
3533
+ }
3534
+ }
3535
+ }
3536
  }
3537
 
3538
+ updateStatus(`Phase 2 complete: ${articlesRead} high-quality articles analyzed`);
3539
  }
3540
 
3541
+ // ==================== PHASE 3: COMPILE COMPREHENSIVE RESULTS WITH SMART DEDUPLICATION ====================
3542
+ updateStatus('Phase 3: Synthesizing research findings with smart deduplication...');
3543
 
3544
  if (allSearchResults.length === 0 && articleContents.length === 0) {
3545
  return { success: false, results: '', source: '', searchCount: 0, articlesRead: 0, statusUpdates };
3546
  }
3547
 
3548
+ // Apply smart deduplication if enabled
3549
+ let processedResults = allSearchResults;
3550
+ if (DEEP_RESEARCH_CONFIG.enableDeduplication) {
3551
+ processedResults = deduplicateResults(allSearchResults);
3552
+ updateStatus(`Deduplication: ${allSearchResults.length} → ${processedResults.length} unique results`);
3553
+ }
3554
 
3555
+ // Apply source diversity enforcement if enabled
3556
+ if (DEEP_RESEARCH_CONFIG.enforceDiversity) {
3557
+ const maxPerSource = DEEP_RESEARCH_CONFIG.maxResultsPerSource || 5;
3558
+ processedResults = groupResultsBySource(processedResults, maxPerSource);
3559
+ updateStatus(`Source diversity enforced: max ${maxPerSource} results per source`);
 
 
 
 
3560
  }
3561
 
3562
  // Group results by source for organized output
3563
  const resultsBySource = {};
3564
+ for (const result of processedResults) {
3565
  const source = result.source || 'Unknown';
3566
  if (!resultsBySource[source]) {
3567
  resultsBySource[source] = [];
 
3573
  let combinedResults = '';
3574
  let sourceIndex = 1;
3575
 
3576
+ // PREMIUM: Prioritize sources by authority and reliability
3577
+ const sourceOrder = [
3578
+ // Academic & Research (Highest Priority)
3579
+ 'PubMed Medical Research', 'PubMed',
3580
+ 'Semantic Scholar', 'Semantic Scholar Academic',
3581
+ 'arXiv Research Papers', 'arXiv Research',
3582
+ // Encyclopedia & Reference
3583
+ 'Wikipedia', 'Wikipedia API',
3584
+ 'Wikidata Structured', 'Wikidata',
3585
+ 'DBpedia Knowledge', 'DBpedia',
3586
+ 'Wikiquote',
3587
+ 'MediaWiki Encyclopedia', 'MediaWiki',
3588
+ // News & Media
3589
+ 'Google News RSS', 'Google News', 'Reuters', 'BBC', 'AP News',
3590
+ // Meta-Search & Web Search
3591
+ 'SearXNG Meta-Search', 'SearXNG',
3592
+ 'Bing Search', 'Bing Web',
3593
+ 'DuckDuckGo Web', 'DuckDuckGo Instant', 'DuckDuckGo',
3594
+ // Developer & Tech
3595
+ 'GitHub Repositories', 'GitHub',
3596
+ 'StackOverflow Q&A', 'StackOverflow',
3597
+ 'Hacker News Tech', 'Hacker News',
3598
+ 'NPM Registry', 'PyPI Python Packages', 'PyPI',
3599
+ // Community & Social
3600
+ 'Reddit Discussions', 'Reddit',
3601
+ // Books & Literature & Archives
3602
+ 'Internet Archive',
3603
+ 'Open Library Books', 'Open Library',
3604
+ // Real-Time Data
3605
+ 'CoinGecko Live Crypto', 'CoinGecko',
3606
+ 'Yahoo Finance Live', 'Yahoo Finance',
3607
+ 'Open-Meteo Weather',
3608
+ 'TheSportsDB Live',
3609
+ 'Currency Exchange API',
3610
+ // Reference APIs
3611
+ 'Dictionary API', 'Quotable API'
3612
+ ];
3613
 
3614
  const sortedSources = Object.keys(resultsBySource).sort((a, b) => {
3615
  const aIndex = sourceOrder.findIndex(s => a.includes(s));
 
3633
  sourceIndex++;
3634
  }
3635
 
3636
+ // Add full article content with quality scores and advanced deduplication
3637
  if (articleContents.length > 0) {
3638
+ combinedResults += `\n## 📄 FULL ARTICLE ANALYSIS (Premium Quality-Filtered)\n`;
3639
+ combinedResults += `*${articleContents.length} high-quality articles analyzed with ${(DEEP_RESEARCH_CONFIG.qualityThreshold * 100).toFixed(0)}% quality threshold*\n`;
3640
+ combinedResults += `*Articles sorted by quality score (highest first)*\n\n`;
3641
+
3642
+ // Sort articles by quality score (highest first)
3643
+ const sortedArticles = articleContents.sort((a, b) => (b.qualityScore || 0) - (a.qualityScore || 0));
3644
 
3645
+ // Deduplicate articles by content hash
3646
+ const seenArticleHashes = new Set();
3647
  let articleNum = 1;
3648
 
3649
+ for (const article of sortedArticles) {
3650
+ const articleHash = generateContentHash(article.content);
3651
+ if (seenArticleHashes.has(articleHash)) continue;
3652
+ seenArticleHashes.add(articleHash);
3653
 
3654
  combinedResults += `### Article ${articleNum}: ${article.domain}\n`;
3655
  combinedResults += `**URL:** ${article.url}\n`;
3656
+ combinedResults += `**Quality Score:** ${(article.qualityScore * 100).toFixed(1)}%\n`;
3657
 
3658
  // Use configured max content length
3659
+ const maxLen = DEEP_RESEARCH_CONFIG.maxArticleContent || 8000;
3660
  const truncatedContent = article.content.length > maxLen
3661
  ? article.content.substring(0, maxLen) + '\n\n[...content truncated for brevity]'
3662
  : article.content;
 
3665
  }
3666
  }
3667
 
3668
+ const allSources = [...new Set(processedResults.map(r => r.source))];
3669
  const duration = Date.now() - startTime;
3670
 
3671
+ updateStatus(`Research complete: ${totalSearches} searches, ${articlesRead} articles, ${allSources.length} sources in ${(duration / 1000).toFixed(1)}s`);
3672
+
3673
+ const finalResults = `## 🔬 DEEP RESEARCH ANALYSIS REPORT (PREMIUM EDITION)
3674
+
3675
+ **📊 Research Statistics:**
3676
+ - **Searches Executed:** ${totalSearches} parallel queries
3677
+ - **Articles Analyzed:** ${articlesRead} full-text articles
3678
+ - **Unique Sources:** ${allSources.length} knowledge bases
3679
+ - **Quality Threshold:** ${(DEEP_RESEARCH_CONFIG.qualityThreshold * 100).toFixed(0)}% (Premium Quality Filter)
3680
+ - **Search Variations:** ${searchVariations.length} query angles
3681
+ - **Duration:** ${(duration / 1000).toFixed(1)}s
3682
+ - **Date:** ${new Date().toLocaleDateString('en-IN', { day: 'numeric', month: 'long', year: 'numeric' })}
3683
+ - **Sources:** ${allSources.slice(0, 10).join(', ')}${allSources.length > 10 ? ` (+${allSources.length - 10} more)` : ''}
3684
+
3685
+ **🎯 Premium Features Active:**
3686
+ - ✅ Multi-Source Parallel Search (15+ APIs)
3687
+ - ✅ Smart Content Deduplication
3688
+ - ✅ Quality Scoring & Filtering (60%+ threshold)
3689
+ - ✅ Source Diversity Enforcement
3690
+ - ✅ Automatic Retry Mechanism
3691
+ - ✅ Full Article Content Extraction
3692
+ - ✅ Real-Time Data Integration
3693
 
3694
  ---
3695
 
 
3698
  return {
3699
  success: true,
3700
  results: finalResults,
3701
+ source: `DeepResearch Premium (${allSources.slice(0, 5).join(', ')}${allSources.length > 5 ? ` +${allSources.length - 5} more` : ''})`,
3702
  searchCount: totalSearches,
3703
  articlesRead,
3704
  statusUpdates
 
6192
  });
6193
  }
6194
 
6195
+ // ==================== ADDITIONAL PREMIUM FREE APIs FOR DEEPRESEARCH ====================
6196
+
6197
+ /**
6198
+ * Search Wikiquote for famous quotes (100% free, no API key)
6199
+ * @param {string} query - Search query
6200
+ * @returns {Promise<string>} Quote results
6201
+ */
6202
+ function searchWikiquote(query) {
6203
+ return new Promise((resolve) => {
6204
+ if (!query || typeof query !== 'string') {
6205
+ resolve('');
6206
+ return;
6207
+ }
6208
+
6209
+ const options = {
6210
+ hostname: 'en.wikiquote.org',
6211
+ path: `/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=5&namespace=0&format=json`,
6212
+ method: 'GET',
6213
+ timeout: 8000,
6214
+ headers: {
6215
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6216
+ }
6217
+ };
6218
+
6219
+ const req = https.request(options, (res) => {
6220
+ let data = '';
6221
+ res.on('data', chunk => data += chunk);
6222
+ res.on('end', () => {
6223
+ try {
6224
+ const json = JSON.parse(data);
6225
+ if (Array.isArray(json) && json.length >= 4) {
6226
+ const titles = json[1] || [];
6227
+ const descriptions = json[2] || [];
6228
+ const urls = json[3] || [];
6229
+
6230
+ if (titles.length > 0) {
6231
+ const results = ['**Wikiquote Results:**\n'];
6232
+ for (let i = 0; i < Math.min(titles.length, 5); i++) {
6233
+ results.push(`**${i + 1}. ${titles[i]}**`);
6234
+ if (descriptions[i]) results.push(descriptions[i]);
6235
+ if (urls[i]) results.push(`🔗 ${urls[i]}`);
6236
+ results.push('');
6237
+ }
6238
+ resolve(results.join('\n'));
6239
+ } else {
6240
+ resolve('');
6241
+ }
6242
+ } else {
6243
+ resolve('');
6244
+ }
6245
+ } catch (e) {
6246
+ resolve('');
6247
+ }
6248
+ });
6249
+ });
6250
+
6251
+ req.on('error', () => resolve(''));
6252
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6253
+ req.end();
6254
+ });
6255
+ }
6256
+
6257
+ /**
6258
+ * Search DBpedia for structured knowledge (100% free, no API key)
6259
+ * @param {string} query - Search query
6260
+ * @returns {Promise<string>} DBpedia results
6261
+ */
6262
+ function searchDBpedia(query) {
6263
+ return new Promise((resolve) => {
6264
+ if (!query || typeof query !== 'string') {
6265
+ resolve('');
6266
+ return;
6267
+ }
6268
+
6269
+ const options = {
6270
+ hostname: 'lookup.dbpedia.org',
6271
+ path: `/api/search?query=${encodeURIComponent(query)}&format=json&maxResults=5`,
6272
+ method: 'GET',
6273
+ timeout: 8000,
6274
+ headers: {
6275
+ 'Accept': 'application/json',
6276
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6277
+ }
6278
+ };
6279
+
6280
+ const req = https.request(options, (res) => {
6281
+ let data = '';
6282
+ res.on('data', chunk => data += chunk);
6283
+ res.on('end', () => {
6284
+ try {
6285
+ const json = JSON.parse(data);
6286
+ if (json.docs && Array.isArray(json.docs) && json.docs.length > 0) {
6287
+ const results = ['**DBpedia Knowledge Base:**\n'];
6288
+ json.docs.slice(0, 5).forEach((doc, i) => {
6289
+ if (doc.label && doc.label[0]) {
6290
+ results.push(`**${i + 1}. ${doc.label[0]}**`);
6291
+ if (doc.comment && doc.comment[0]) {
6292
+ results.push(doc.comment[0].substring(0, 300));
6293
+ }
6294
+ if (doc.resource && doc.resource[0]) {
6295
+ results.push(`🔗 ${doc.resource[0]}`);
6296
+ }
6297
+ results.push('');
6298
+ }
6299
+ });
6300
+ resolve(results.join('\n'));
6301
+ } else {
6302
+ resolve('');
6303
+ }
6304
+ } catch (e) {
6305
+ resolve('');
6306
+ }
6307
+ });
6308
+ });
6309
+
6310
+ req.on('error', () => resolve(''));
6311
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6312
+ req.end();
6313
+ });
6314
+ }
6315
+
6316
+ /**
6317
+ * Search PubMed for medical/scientific research (100% free, no API key)
6318
+ * @param {string} query - Search query
6319
+ * @returns {Promise<string>} PubMed results
6320
+ */
6321
+ function searchPubMed(query) {
6322
+ return new Promise((resolve) => {
6323
+ if (!query || typeof query !== 'string') {
6324
+ resolve('');
6325
+ return;
6326
+ }
6327
+
6328
+ // First, search for article IDs
6329
+ const searchOptions = {
6330
+ hostname: 'eutils.ncbi.nlm.nih.gov',
6331
+ path: `/entrez/eutils/esearch.fcgi?db=pubmed&term=${encodeURIComponent(query)}&retmax=5&retmode=json`,
6332
+ method: 'GET',
6333
+ timeout: 10000,
6334
+ headers: {
6335
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6336
+ }
6337
+ };
6338
+
6339
+ const req = https.request(searchOptions, (res) => {
6340
+ let data = '';
6341
+ res.on('data', chunk => data += chunk);
6342
+ res.on('end', () => {
6343
+ try {
6344
+ const json = JSON.parse(data);
6345
+ if (json.esearchresult && json.esearchresult.idlist && json.esearchresult.idlist.length > 0) {
6346
+ const ids = json.esearchresult.idlist.slice(0, 5).join(',');
6347
+
6348
+ // Fetch article summaries
6349
+ const summaryOptions = {
6350
+ hostname: 'eutils.ncbi.nlm.nih.gov',
6351
+ path: `/entrez/eutils/esummary.fcgi?db=pubmed&id=${ids}&retmode=json`,
6352
+ method: 'GET',
6353
+ timeout: 10000
6354
+ };
6355
+
6356
+ const summaryReq = https.request(summaryOptions, (summaryRes) => {
6357
+ let summaryData = '';
6358
+ summaryRes.on('data', chunk => summaryData += chunk);
6359
+ summaryRes.on('end', () => {
6360
+ try {
6361
+ const summaryJson = JSON.parse(summaryData);
6362
+ if (summaryJson.result) {
6363
+ const results = ['**PubMed Medical Research:**\n'];
6364
+ let count = 1;
6365
+ for (const id in summaryJson.result) {
6366
+ if (id !== 'uids' && summaryJson.result[id].title) {
6367
+ const article = summaryJson.result[id];
6368
+ results.push(`**${count}. ${article.title}**`);
6369
+ if (article.authors && article.authors.length > 0) {
6370
+ const authorNames = article.authors.slice(0, 3).map(a => a.name).join(', ');
6371
+ results.push(`Authors: ${authorNames}${article.authors.length > 3 ? ' et al.' : ''}`);
6372
+ }
6373
+ if (article.source) results.push(`Journal: ${article.source}`);
6374
+ if (article.pubdate) results.push(`Published: ${article.pubdate}`);
6375
+ results.push(`🔗 https://pubmed.ncbi.nlm.nih.gov/${id}/`);
6376
+ results.push('');
6377
+ count++;
6378
+ }
6379
+ }
6380
+ resolve(results.join('\n'));
6381
+ } else {
6382
+ resolve('');
6383
+ }
6384
+ } catch (e) {
6385
+ resolve('');
6386
+ }
6387
+ });
6388
+ });
6389
+ summaryReq.on('error', () => resolve(''));
6390
+ summaryReq.on('timeout', () => { summaryReq.destroy(); resolve(''); });
6391
+ summaryReq.end();
6392
+ } else {
6393
+ resolve('');
6394
+ }
6395
+ } catch (e) {
6396
+ resolve('');
6397
+ }
6398
+ });
6399
+ });
6400
+
6401
+ req.on('error', () => resolve(''));
6402
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6403
+ req.end();
6404
+ });
6405
+ }
6406
+
6407
+ /**
6408
+ * Search Semantic Scholar for academic papers (100% free, no API key)
6409
+ * @param {string} query - Search query
6410
+ * @returns {Promise<string>} Semantic Scholar results
6411
+ */
6412
+ function searchSemanticScholar(query) {
6413
+ return new Promise((resolve) => {
6414
+ if (!query || typeof query !== 'string') {
6415
+ resolve('');
6416
+ return;
6417
+ }
6418
+
6419
+ const options = {
6420
+ hostname: 'api.semanticscholar.org',
6421
+ path: `/graph/v1/paper/search?query=${encodeURIComponent(query)}&limit=5&fields=title,authors,year,abstract,url,citationCount`,
6422
+ method: 'GET',
6423
+ timeout: 10000,
6424
+ headers: {
6425
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6426
+ }
6427
+ };
6428
+
6429
+ const req = https.request(options, (res) => {
6430
+ let data = '';
6431
+ res.on('data', chunk => data += chunk);
6432
+ res.on('end', () => {
6433
+ try {
6434
+ const json = JSON.parse(data);
6435
+ if (json.data && Array.isArray(json.data) && json.data.length > 0) {
6436
+ const results = ['**Semantic Scholar Academic Papers:**\n'];
6437
+ json.data.forEach((paper, i) => {
6438
+ results.push(`**${i + 1}. ${paper.title}**`);
6439
+ if (paper.authors && paper.authors.length > 0) {
6440
+ const authorNames = paper.authors.slice(0, 3).map(a => a.name).join(', ');
6441
+ results.push(`Authors: ${authorNames}${paper.authors.length > 3 ? ' et al.' : ''}`);
6442
+ }
6443
+ if (paper.year) results.push(`Year: ${paper.year}`);
6444
+ if (paper.citationCount) results.push(`Citations: ${paper.citationCount}`);
6445
+ if (paper.abstract) {
6446
+ results.push(`Abstract: ${paper.abstract.substring(0, 250)}${paper.abstract.length > 250 ? '...' : ''}`);
6447
+ }
6448
+ if (paper.url) results.push(`🔗 ${paper.url}`);
6449
+ results.push('');
6450
+ });
6451
+ resolve(results.join('\n'));
6452
+ } else {
6453
+ resolve('');
6454
+ }
6455
+ } catch (e) {
6456
+ resolve('');
6457
+ }
6458
+ });
6459
+ });
6460
+
6461
+ req.on('error', () => resolve(''));
6462
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6463
+ req.end();
6464
+ });
6465
+ }
6466
+
6467
+ /**
6468
+ * Search Wikidata for structured data (100% free, no API key)
6469
+ * @param {string} query - Search query
6470
+ * @returns {Promise<string>} Wikidata results
6471
+ */
6472
+ function searchWikidata(query) {
6473
+ return new Promise((resolve) => {
6474
+ if (!query || typeof query !== 'string') {
6475
+ resolve('');
6476
+ return;
6477
+ }
6478
+
6479
+ const options = {
6480
+ hostname: 'www.wikidata.org',
6481
+ path: `/w/api.php?action=wbsearchentities&search=${encodeURIComponent(query)}&language=en&limit=5&format=json`,
6482
+ method: 'GET',
6483
+ timeout: 8000,
6484
+ headers: {
6485
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6486
+ }
6487
+ };
6488
+
6489
+ const req = https.request(options, (res) => {
6490
+ let data = '';
6491
+ res.on('data', chunk => data += chunk);
6492
+ res.on('end', () => {
6493
+ try {
6494
+ const json = JSON.parse(data);
6495
+ if (json.search && Array.isArray(json.search) && json.search.length > 0) {
6496
+ const results = ['**Wikidata Structured Knowledge:**\n'];
6497
+ json.search.forEach((item, i) => {
6498
+ results.push(`**${i + 1}. ${item.label}**`);
6499
+ if (item.description) results.push(item.description);
6500
+ if (item.url) results.push(`🔗 ${item.url}`);
6501
+ results.push('');
6502
+ });
6503
+ resolve(results.join('\n'));
6504
+ } else {
6505
+ resolve('');
6506
+ }
6507
+ } catch (e) {
6508
+ resolve('');
6509
+ }
6510
+ });
6511
+ });
6512
+
6513
+ req.on('error', () => resolve(''));
6514
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6515
+ req.end();
6516
+ });
6517
+ }
6518
+
6519
+ /**
6520
+ * Search Internet Archive for historical content (100% free, no API key)
6521
+ * @param {string} query - Search query
6522
+ * @returns {Promise<string>} Internet Archive results
6523
+ */
6524
+ function searchInternetArchive(query) {
6525
+ return new Promise((resolve) => {
6526
+ if (!query || typeof query !== 'string') {
6527
+ resolve('');
6528
+ return;
6529
+ }
6530
+
6531
+ const options = {
6532
+ hostname: 'archive.org',
6533
+ path: `/advancedsearch.php?q=${encodeURIComponent(query)}&fl=identifier,title,description,date,mediatype&rows=5&output=json`,
6534
+ method: 'GET',
6535
+ timeout: 10000,
6536
+ headers: {
6537
+ 'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
6538
+ }
6539
+ };
6540
+
6541
+ const req = https.request(options, (res) => {
6542
+ let data = '';
6543
+ res.on('data', chunk => data += chunk);
6544
+ res.on('end', () => {
6545
+ try {
6546
+ const json = JSON.parse(data);
6547
+ if (json.response && json.response.docs && json.response.docs.length > 0) {
6548
+ const results = ['**Internet Archive Historical Content:**\n'];
6549
+ json.response.docs.forEach((doc, i) => {
6550
+ if (doc.title) {
6551
+ results.push(`**${i + 1}. ${doc.title}**`);
6552
+ if (doc.description) {
6553
+ const desc = Array.isArray(doc.description) ? doc.description[0] : doc.description;
6554
+ results.push(desc.substring(0, 200));
6555
+ }
6556
+ if (doc.date) results.push(`Date: ${doc.date}`);
6557
+ if (doc.mediatype) results.push(`Type: ${doc.mediatype}`);
6558
+ if (doc.identifier) results.push(`🔗 https://archive.org/details/${doc.identifier}`);
6559
+ results.push('');
6560
+ }
6561
+ });
6562
+ resolve(results.join('\n'));
6563
+ } else {
6564
+ resolve('');
6565
+ }
6566
+ } catch (e) {
6567
+ resolve('');
6568
+ }
6569
+ });
6570
+ });
6571
+
6572
+ req.on('error', () => resolve(''));
6573
+ req.on('timeout', () => { req.destroy(); resolve(''); });
6574
+ req.end();
6575
+ });
6576
+ }
6577
+
6578
  /**
6579
  * Get user location from IP using IP-API (100% free, no API key)
6580
  * Note: This works for server-side detection, not client IP
uploads/.gitkeep ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # This file ensures the uploads directory is tracked by git
2
+ # Uploaded files are ignored via .gitignore