trenches / src /lib /data-sources /allowed-domains.ts
Codex
sync main snapshot for HF Space
1794757
const RAW_ALLOWED_DOMAINS = [
"feeds.bbci.co.uk",
"www.theguardian.com",
"feeds.npr.org",
"news.google.com",
"www.aljazeera.com",
"rss.cnn.com",
"hnrss.org",
"feeds.arstechnica.com",
"www.theverge.com",
"www.cnbc.com",
"feeds.marketwatch.com",
"www.defenseone.com",
"breakingdefense.com",
"www.bellingcat.com",
"techcrunch.com",
"huggingface.co",
"www.technologyreview.com",
"rss.arxiv.org",
"export.arxiv.org",
"www.federalreserve.gov",
"www.sec.gov",
"www.whitehouse.gov",
"www.state.gov",
"www.defense.gov",
"home.treasury.gov",
"www.justice.gov",
"tools.cdc.gov",
"www.fema.gov",
"www.dhs.gov",
"www.thedrive.com",
"krebsonsecurity.com",
"finance.yahoo.com",
"thediplomat.com",
"venturebeat.com",
"foreignpolicy.com",
"www.ft.com",
"openai.com",
"www.reutersagency.com",
"feeds.reuters.com",
"asia.nikkei.com",
"www.cfr.org",
"www.csis.org",
"www.politico.com",
"www.brookings.edu",
"layoffs.fyi",
"www.defensenews.com",
"www.militarytimes.com",
"taskandpurpose.com",
"news.usni.org",
"www.oryxspioenkop.com",
"www.gov.uk",
"www.foreignaffairs.com",
"www.atlanticcouncil.org",
"www.zdnet.com",
"www.techmeme.com",
"www.darkreading.com",
"www.schneier.com",
"rss.politico.com",
"www.anandtech.com",
"www.tomshardware.com",
"www.semianalysis.com",
"feed.infoq.com",
"thenewstack.io",
"devops.com",
"dev.to",
"lobste.rs",
"changelog.com",
"seekingalpha.com",
"news.crunchbase.com",
"www.saastr.com",
"feeds.feedburner.com",
"www.producthunt.com",
"www.axios.com",
"api.axios.com",
"feeds.content.dowjones.io",
"github.blog",
"githubnext.com",
"mshibanami.github.io",
"www.engadget.com",
"news.mit.edu",
"dev.events",
"www.ycombinator.com",
"a16z.com",
"review.firstround.com",
"www.sequoiacap.com",
"www.nfx.com",
"www.aaronsw.com",
"bothsidesofthetable.com",
"www.lennysnewsletter.com",
"stratechery.com",
"www.eu-startups.com",
"tech.eu",
"sifted.eu",
"www.techinasia.com",
"kr-asia.com",
"techcabal.com",
"disrupt-africa.com",
"lavca.org",
"contxto.com",
"inc42.com",
"yourstory.com",
"pitchbook.com",
"www.cbinsights.com",
"www.techstars.com",
"english.alarabiya.net",
"www.arabnews.com",
"www.timesofisrael.com",
"www.haaretz.com",
"www.scmp.com",
"kyivindependent.com",
"www.themoscowtimes.com",
"feeds.24.com",
"feeds.capi24.com",
"www.france24.com",
"www.euronews.com",
"www.lemonde.fr",
"rss.dw.com",
"www.africanews.com",
"www.lasillavacia.com",
"www.channelnewsasia.com",
"www.thehindu.com",
"news.un.org",
"www.iaea.org",
"www.who.int",
"www.cisa.gov",
"www.crisisgroup.org",
"rusi.org",
"warontherocks.com",
"www.aei.org",
"responsiblestatecraft.org",
"www.fpri.org",
"jamestown.org",
"www.chathamhouse.org",
"ecfr.eu",
"www.gmfus.org",
"www.wilsoncenter.org",
"www.lowyinstitute.org",
"www.mei.edu",
"www.stimson.org",
"www.cnas.org",
"carnegieendowment.org",
"www.rand.org",
"fas.org",
"www.armscontrol.org",
"www.nti.org",
"thebulletin.org",
"www.iss.europa.eu",
"www.fao.org",
"worldbank.org",
"www.imf.org",
"www.hurriyet.com.tr",
"tvn24.pl",
"www.polsatnews.pl",
"www.rp.pl",
"meduza.io",
"novayagazeta.eu",
"www.bangkokpost.com",
"vnexpress.net",
"www.abc.net.au",
"news.ycombinator.com",
"www.coindesk.com",
"cointelegraph.com",
"www.goodnewsnetwork.org",
"www.positive.news",
"reasonstobecheerful.world",
"www.optimistdaily.com",
"www.sunnyskyz.com",
"www.huffpost.com",
"www.sciencedaily.com",
"feeds.nature.com",
"www.livescience.com",
"www.newscientist.com",
] as const;
export const ALLOWED_SOURCE_DOMAINS = new Set(
RAW_ALLOWED_DOMAINS.map((domain) => normalizeDomain(domain)),
);
export function normalizeDomain(domain: string): string {
return domain
.trim()
.toLowerCase()
.replace(/^https?:\/\//, "")
.replace(/^www\./, "")
.replace(/\/.*$/, "");
}
export function isAllowedDomain(domain: string): boolean {
const normalized = normalizeDomain(domain);
if (ALLOWED_SOURCE_DOMAINS.has(normalized)) {
return true;
}
for (const candidate of ALLOWED_SOURCE_DOMAINS) {
if (normalized.endsWith(`.${candidate}`)) {
return true;
}
}
return false;
}
export function isAllowedUrl(url: string): boolean {
try {
return isAllowedDomain(new URL(url).hostname);
} catch {
return false;
}
}