borsa / nextjs-app /src /lib /google-news-rss.ts
veteroner's picture
fix: read eligibility from scan work snapshot
2cecfa4
/**
* Google News RSS β€” direct RSS fetch for Turkish stock market news.
*
* No backend proxy required; works standalone.
* Port of HuggingFace Space `data/news_api.py` to TypeScript.
*/
import { USER_AGENT } from '@/lib/runtime-config'
export interface RSSNewsItem {
id: string
symbol: string | null
title: string
content: string
source: string
published_at: string
url: string
sentiment: string | null // filled downstream
sentiment_score: number | null
}
// ── helpers ──────────────────────────────────────────────────────
const TAG_RE = /<[^>]+>/g
const WS_RE = /\s+/g
function stripHtml(text: string): string {
if (!text) return ''
return text
.replace(TAG_RE, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(WS_RE, ' ')
.trim()
}
function parseRFC822(dateStr: string): string {
if (!dateStr) return ''
try {
const d = new Date(dateStr)
if (isNaN(d.getTime())) return ''
return d.toISOString()
} catch {
return ''
}
}
function deterministicId(title: string, publishedAt: string, source: string): string {
let h = 0
const raw = `${title}|${publishedAt}|${source}`
for (let i = 0; i < raw.length; i++) {
h = ((h << 5) - h + raw.charCodeAt(i)) | 0
}
return `gnews:${(h >>> 0).toString(16).toUpperCase()}`
}
// ── RSS URL building ─────────────────────────────────────────────
function googleNewsUrl(query: string, market: 'bist' | 'us' = 'bist'): string {
const defaultQ = market === 'us' ? 'stock market OR NYSE OR NASDAQ' : 'borsa istanbul OR BIST'
const q = (query || '').trim() || defaultQ
const encoded = encodeURIComponent(q)
if (market === 'us') {
return `https://news.google.com/rss/search?q=${encoded}&hl=en-US&gl=US&ceid=US:en`
}
return `https://news.google.com/rss/search?q=${encoded}&hl=tr-TR&gl=TR&ceid=TR:tr`
}
// ── Simple XML extraction (no external deps) ────────────────────
function extractTagContent(xml: string, tag: string): string {
const openRegex = new RegExp(`<${tag}[^>]*>`, 'i')
const closeRegex = new RegExp(`</${tag}>`, 'i')
const openMatch = openRegex.exec(xml)
if (!openMatch) return ''
const start = openMatch.index + openMatch[0].length
const closeMatch = closeRegex.exec(xml.slice(start))
if (!closeMatch) return ''
return xml.slice(start, start + closeMatch.index).trim()
}
function extractSourceText(itemXml: string): string {
// <source url="...">SourceName</source>
const match = /<source[^>]*>([^<]*)<\/source>/i.exec(itemXml)
return match ? match[1].trim() : ''
}
function extractItems(xml: string): string[] {
const items: string[] = []
let remaining = xml
while (true) {
const start = remaining.indexOf('<item>')
if (start === -1) break
const end = remaining.indexOf('</item>', start)
if (end === -1) break
items.push(remaining.slice(start, end + 7))
remaining = remaining.slice(end + 7)
}
return items
}
// ── In-memory cache (per-process) ────────────────────────────────
const CACHE = new Map<string, { at: number; items: RSSNewsItem[] }>()
const CACHE_TTL_MS = 10 * 60 * 1000 // 10 minutes
// ── Main fetch function ──────────────────────────────────────────
export async function fetchGoogleNewsRSS(
query: string,
opts?: { symbol?: string; limit?: number; timeoutMs?: number; market?: 'bist' | 'us' }
): Promise<RSSNewsItem[]> {
const symbol = opts?.symbol ?? null
const limit = Math.max(1, Math.min(opts?.limit ?? 20, 200))
const timeoutMs = opts?.timeoutMs ?? 12000
const market = opts?.market ?? 'bist'
const cacheKey = `gnews::${market}::${symbol || ''}::${query}::${limit}`
const now = Date.now()
const cached = CACHE.get(cacheKey)
if (cached && now - cached.at < CACHE_TTL_MS) {
return cached.items
}
const url = googleNewsUrl(query, market)
const controller = new AbortController()
const timer = setTimeout(() => controller.abort(), timeoutMs)
let xml: string
try {
const resp = await fetch(url, {
signal: controller.signal,
headers: {
'User-Agent': USER_AGENT,
Accept: 'application/rss+xml,application/xml,text/xml,*/*',
},
})
if (!resp.ok) return []
xml = await resp.text()
} catch {
return []
} finally {
clearTimeout(timer)
}
const rawItems = extractItems(xml)
const items: RSSNewsItem[] = []
for (const raw of rawItems) {
if (items.length >= limit) break
const title = stripHtml(extractTagContent(raw, 'title'))
const link = extractTagContent(raw, 'link').trim()
const pubDate = extractTagContent(raw, 'pubDate').trim()
const description = stripHtml(extractTagContent(raw, 'description'))
const source = extractSourceText(raw) || 'Google News'
const publishedAt = parseRFC822(pubDate)
if (!title || !publishedAt) continue
items.push({
id: deterministicId(title, publishedAt, source),
symbol,
title,
content: description,
source,
published_at: publishedAt,
url: link,
sentiment: null,
sentiment_score: null,
})
}
CACHE.set(cacheKey, { at: now, items })
return items
}
// ── Convenience wrappers ─────────────────────────────────────────
export async function getMarketNews(limit: number = 20, market: 'bist' | 'us' = 'bist'): Promise<RSSNewsItem[]> {
if (market === 'us') {
return fetchGoogleNewsRSS('stock market OR S&P 500 OR NASDAQ OR NYSE', { limit, market: 'us' })
}
return fetchGoogleNewsRSS('borsa istanbul OR BIST OR Viop', { limit, market: 'bist' })
}
export async function getSymbolNews(symbol: string, limit: number = 20, market: 'bist' | 'us' = 'bist'): Promise<RSSNewsItem[]> {
const sym = (symbol || '').trim().toUpperCase()
if (!sym) return []
if (market === 'us') {
return fetchGoogleNewsRSS(`${sym} stock OR ${sym} shares OR ${sym} earnings`, { symbol: sym, limit, market: 'us' })
}
return fetchGoogleNewsRSS(`${sym} hisse OR ${sym} BIST`, { symbol: sym, limit, market: 'bist' })
}