TeXray / robots.txt
openfree's picture
Upload 3 files
8132da7 verified
# Ginigen AI & K-AI Community - Global Robots.txt
# Used by: TeXray, VIDraft, FINAL Bench, AETHER, and all future projects
# Last Updated: 2026.03.01
# Default rules for all bots
User-agent: *
Allow: /
Allow: /llms.txt
Disallow: /admin/
Disallow: /private/
Disallow: /temp/
Disallow: /.git/
Disallow: /.env
Crawl-delay: 1
# Sitemap declaration
Sitemap: https://example.com/sitemap.xml
Sitemap: https://example.com/sitemap-projects.xml
# ============================================
# AI Crawlers - Actively Welcome
# ============================================
# OpenAI (ChatGPT, GPT-4, etc.)
User-agent: GPTBot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: OpenAI GPT training crawler
# OpenAI ChatGPT Mobile App
User-agent: ChatGPT-User
Allow: /
Crawl-delay: 0
Comment: ChatGPT mobile app crawler
# Google Extensions & AI Overviews
User-agent: Google-Extended
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Google AI Overview / SGE crawler
# Anthropic (Claude)
User-agent: Anthropic-ai
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Anthropic Claude training crawler
User-agent: ClaudeBot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Claude AI assistant
# Perplexity AI
User-agent: PerplexityBot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Perplexity AI search
# Apple Intelligence
User-agent: Applebot-Extended
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Apple AI training
# Cohere
User-agent: cohere-ai
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Cohere AI training
# Meta (LLaMA training)
User-agent: facebookexternalhit
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Meta AI training
# Mistral AI
User-agent: MistralBot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Mistral AI training
# Hugging Face (Hub & Model Training)
User-agent: HuggingFaceBot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: HuggingFace model discovery
# ============================================
# Search Engines
# ============================================
# Google
User-agent: Googlebot
Allow: /
Allow: /llms.txt
Crawl-delay: 0
Comment: Google Search indexing
# Bing
User-agent: Bingbot
Allow: /
Crawl-delay: 1
Comment: Bing Search indexing
# DuckDuckGo
User-agent: DuckDuckBot
Allow: /
Crawl-delay: 1
Comment: DuckDuckGo Search indexing
# ============================================
# Blocked / Rate-Limited Bots
# ============================================
# Aggressive/spam bots
User-agent: AhrefsBot
User-agent: SemrushBot
User-agent: DotBot
User-agent: MJ12bot
Disallow: /
Comment: Blocked: Aggressive crawlers
# Bad actors
User-agent: BadBot
User-agent: SpoofBot
Disallow: /
Comment: Blocked: Known malicious crawlers
# ============================================
# Analytics & Monitoring (Special Rules)
# ============================================
# HuggingFace Analytics
User-agent: HFAnalytics
Allow: /
Crawl-delay: 0
# Custom monitoring
User-agent: GiniginAI-Monitor
Allow: /
Crawl-delay: 0
# ============================================
# Notes for Future Implementation
# ============================================
# 1. Replace "https://example.com" with actual domain
# 2. Add project-specific sitemaps as needed
# 3. Maintain AI crawler allowlist for optimal discoverability
# 4. Keep crawl-delay at 0 for important AI crawlers
# 5. Review and update quarterly as new AI services emerge