/* ============================================ IVY'S RSS HUB — RSS Parser Module Fetches and parses RSS/Atom feeds Uses Dexie (IndexedDB) for caching 🌿 ============================================ */ /** * RSSParser - Handles fetching and parsing RSS/Atom feeds */ class RSSParser { constructor() { this.corsProxies = window.FeedsConfig.CORS_PROXIES; this.currentProxyIndex = 0; this.cache = new Map(); // Memory cache for current session this.cacheTimeout = 5 * 60 * 1000; // 5 minutes cache this.maxArticlesPerFeed = 25; // Limit parsing to 25 articles per feed this.dbReady = false; // Initialize Dexie database this.initDatabase(); } /** * Initialize Dexie database for persistent caching */ async initDatabase() { try { this.db = new Dexie("IvyRSSHubCache"); this.db.version(1).stores({ feeds: "url, data, timestamp" }); // Load cached feeds into memory await this.loadPersistentCache(); this.dbReady = true; console.log("📦 Dexie database ready"); } catch (e) { console.warn("Failed to init Dexie:", e); this.dbReady = false; } } /** * Load cache from IndexedDB into memory */ async loadPersistentCache() { try { if (!this.db) return; const maxAge = 2 * 60 * 60 * 1000; // 2 hours max const now = Date.now(); // Get all cached feeds const cached = await this.db.feeds.toArray(); // Load valid ones into memory cache let loaded = 0; for (const item of cached) { if (now - item.timestamp < maxAge) { this.cache.set(item.url, { data: item.data, timestamp: item.timestamp }); loaded++; } else { // Delete old entries await this.db.feeds.delete(item.url); } } if (loaded > 0) { console.log(`📦 Loaded ${loaded} cached feeds from IndexedDB`); } } catch (e) { console.warn("Failed to load cache:", e); } } /** * Save a feed to IndexedDB */ async saveToPersistentCache(url, data, timestamp) { try { if (!this.db || !this.dbReady) return; await this.db.feeds.put({ url: url, data: data, timestamp: timestamp }); } catch (e) { console.warn("Failed to save to cache:", e); } } /** * Fetch feed with CORS proxy fallback * @param {string} feedUrl - The RSS feed URL * @returns {Promise} - Raw XML content */ async fetchWithProxy(feedUrl) { // Check cache first const cached = this.cache.get(feedUrl); if (cached && Date.now() - cached.timestamp < this.cacheTimeout) { return { data: cached.data, fromCache: true }; } let lastError = null; // Try each proxy in sequence for (let i = 0; i < this.corsProxies.length; i++) { const proxyIndex = (this.currentProxyIndex + i) % this.corsProxies.length; const proxyUrl = this.corsProxies[proxyIndex](feedUrl); try { const response = await fetch(proxyUrl, { headers: { Accept: "application/rss+xml, application/xml, text/xml, application/atom+xml" } }); if (!response.ok) { throw new Error(`HTTP ${response.status}`); } const text = await response.text(); // Validate it's XML if (!text.includes(" ({ title: this.getTextContent(item, "title"), link: this.getTextContent(item, "link"), description: this.cleanDescription(this.getTextContent(item, "description")), pubDate: this.parseDate(this.getTextContent(item, "pubDate")), author: this.getTextContent(item, "author") || this.getDCCreator(item) })); return { title: this.getTextContent(channel, "title"), description: this.getTextContent(channel, "description"), link: this.getTextContent(channel, "link"), items: items.filter(item => item.title && item.link) }; } /** * Get dc:creator content (handles namespace properly) */ getDCCreator(item) { // Try different approaches for dc:creator namespace const creator = item.getElementsByTagName("dc:creator")[0] || item.getElementsByTagName("creator")[0]; return creator?.textContent?.trim() || ""; } /** * Parse Atom format (supports arXiv API format) */ parseAtom(doc) { const feed = doc.querySelector("feed"); if (!feed) { throw new Error("Invalid Atom: no feed element"); } // Limit to maxArticlesPerFeed for performance const items = Array.from(doc.querySelectorAll("entry")) .slice(0, this.maxArticlesPerFeed) .map(entry => { // Atom links can be in format const linkElement = entry.querySelector('link[rel="alternate"]') || entry.querySelector("link"); const link = linkElement?.getAttribute("href") || this.getTextContent(entry, "link"); // Handle multiple authors (common in arXiv API) const authorElements = entry.querySelectorAll("author name"); let author = ""; if (authorElements.length > 0) { // Join first 3 authors, add "et al." if more const names = Array.from(authorElements) .map(el => el.textContent?.trim()) .filter(Boolean); author = names.slice(0, 3).join(", "); if (names.length > 3) author += " et al."; } return { title: this.getTextContent(entry, "title"), link: link, description: this.cleanDescription( this.getTextContent(entry, "summary") || this.getTextContent(entry, "content") ), pubDate: this.parseDate( this.getTextContent(entry, "published") || this.getTextContent(entry, "updated") ), author: author }; }); const titleLink = feed.querySelector('link[rel="alternate"]') || feed.querySelector("link"); return { title: this.getTextContent(feed, "title"), description: this.getTextContent(feed, "subtitle"), link: titleLink?.getAttribute("href") || "", items: items.filter(item => item.title && item.link) }; } /** * Helper: Get text content of an element */ getTextContent(parent, selector) { const element = parent.querySelector(selector); return element?.textContent?.trim() || ""; } /** * Helper: Parse date string to Date object */ parseDate(dateStr) { if (!dateStr) return null; try { const date = new Date(dateStr); return isNaN(date.getTime()) ? null : date; } catch { return null; } } /** * Helper: Clean HTML from description * Uses DOMParser for safer HTML stripping (avoids script execution) */ cleanDescription(html) { if (!html) return ""; // Use DOMParser for safer HTML parsing (doesn't execute scripts) try { const doc = new DOMParser().parseFromString(html, "text/html"); let text = doc.body.textContent || ""; // Fallback for edge cases if (!text && html) { const temp = document.createElement("div"); temp.textContent = html; // Use textContent to set, not innerHTML text = temp.textContent; } // Trim and normalize whitespace text = text.trim().replace(/\s+/g, " "); // Limit length if (text.length > 200) { text = text.substring(0, 200) + "..."; } return text; } catch { // Ultimate fallback: just strip tags with regex return html.replace(/<[^>]*>/g, "").substring(0, 200); } } /** * Main method: Fetch and parse a feed * @param {Object} feedConfig - Feed configuration object * @returns {Promise} - Parsed feed with metadata */ async fetchFeed(feedConfig) { try { const { data: xmlText, fromCache } = await this.fetchWithProxy(feedConfig.url); const parsed = this.parseXML(xmlText); return { ...feedConfig, feed: parsed, status: "success", fromCache: fromCache, lastFetched: new Date() }; } catch (error) { console.error(`Failed to fetch ${feedConfig.name}:`, error); return { ...feedConfig, feed: null, status: "error", error: error.message, lastFetched: new Date() }; } } /** * Fetch multiple feeds with progressive callback * Includes rate limiting delay between batches for APIs like arXiv (3s rule) * @param {Array} feedConfigs - Array of feed configurations * @param {Function} onProgress - Callback called after each feed loads * @returns {Promise} - Array of parsed feeds */ async fetchAllFeedsProgressive(feedConfigs, onProgress) { const results = []; const batchSize = 5; // Fetch 5 at a time for balance const batchDelay = 1000; // 1 second delay between batches to be nice to APIs for (let i = 0; i < feedConfigs.length; i += batchSize) { const batch = feedConfigs.slice(i, i + batchSize); const batchResults = await Promise.allSettled(batch.map(config => this.fetchFeed(config))); batchResults.forEach((result, idx) => { if (result.status === "fulfilled") { results.push(result.value); } else { results.push({ ...batch[idx], feed: null, status: "error", error: result.reason?.message || "Unknown error" }); } }); // Call progress callback if (onProgress) { onProgress(results, i + batch.length, feedConfigs.length); } // Add delay between batches to respect rate limits (arXiv = 3s rule) // Only delay if there are more batches to process if (i + batchSize < feedConfigs.length) { await new Promise(resolve => setTimeout(resolve, batchDelay)); } } return results; } /** * Fetch multiple feeds in parallel (legacy) * @param {Array} feedConfigs - Array of feed configurations * @returns {Promise} - Array of parsed feeds */ async fetchAllFeeds(feedConfigs) { const results = await Promise.allSettled(feedConfigs.map(config => this.fetchFeed(config))); return results.map((result, index) => { if (result.status === "fulfilled") { return result.value; } else { return { ...feedConfigs[index], feed: null, status: "error", error: result.reason?.message || "Unknown error" }; } }); } /** * Clear the cache (memory only, keeps IndexedDB) */ clearCache() { this.cache.clear(); } /** * Clear all cache including IndexedDB */ async clearAllCache() { this.cache.clear(); try { if (this.db) { await this.db.feeds.clear(); console.log("🗑️ IndexedDB cache cleared"); } // Also clean up old localStorage if it exists localStorage.removeItem("ivy-rss-cache"); } catch (e) { console.warn("Failed to clear cache:", e); } } } // Export for use in app window.RSSParser = RSSParser;