| # Ginigen AI & K-AI Community - Global Robots.txt |
| # Used by: TeXray, VIDraft, FINAL Bench, AETHER, and all future projects |
| # Last Updated: 2026.03.01 |
|
|
| # Default rules for all bots |
| User-agent: * |
| Allow: / |
| Allow: /llms.txt |
| Disallow: /admin/ |
| Disallow: /private/ |
| Disallow: /temp/ |
| Disallow: /.git/ |
| Disallow: /.env |
| Crawl-delay: 1 |
|
|
| # Sitemap declaration |
| Sitemap: https://example.com/sitemap.xml |
| Sitemap: https://example.com/sitemap-projects.xml |
|
|
| # ============================================ |
| # AI Crawlers - Actively Welcome |
| # ============================================ |
|
|
| # OpenAI (ChatGPT, GPT-4, etc.) |
| User-agent: GPTBot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: OpenAI GPT training crawler |
|
|
| # OpenAI ChatGPT Mobile App |
| User-agent: ChatGPT-User |
| Allow: / |
| Crawl-delay: 0 |
| Comment: ChatGPT mobile app crawler |
|
|
| # Google Extensions & AI Overviews |
| User-agent: Google-Extended |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Google AI Overview / SGE crawler |
|
|
| # Anthropic (Claude) |
| User-agent: Anthropic-ai |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Anthropic Claude training crawler |
|
|
| User-agent: ClaudeBot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Claude AI assistant |
|
|
| # Perplexity AI |
| User-agent: PerplexityBot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Perplexity AI search |
|
|
| # Apple Intelligence |
| User-agent: Applebot-Extended |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Apple AI training |
|
|
| # Cohere |
| User-agent: cohere-ai |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Cohere AI training |
|
|
| # Meta (LLaMA training) |
| User-agent: facebookexternalhit |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Meta AI training |
|
|
| # Mistral AI |
| User-agent: MistralBot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Mistral AI training |
|
|
| # Hugging Face (Hub & Model Training) |
| User-agent: HuggingFaceBot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: HuggingFace model discovery |
|
|
| # ============================================ |
| # Search Engines |
| # ============================================ |
|
|
| # Google |
| User-agent: Googlebot |
| Allow: / |
| Allow: /llms.txt |
| Crawl-delay: 0 |
| Comment: Google Search indexing |
|
|
| # Bing |
| User-agent: Bingbot |
| Allow: / |
| Crawl-delay: 1 |
| Comment: Bing Search indexing |
|
|
| # DuckDuckGo |
| User-agent: DuckDuckBot |
| Allow: / |
| Crawl-delay: 1 |
| Comment: DuckDuckGo Search indexing |
|
|
| # ============================================ |
| # Blocked / Rate-Limited Bots |
| # ============================================ |
|
|
| # Aggressive/spam bots |
| User-agent: AhrefsBot |
| User-agent: SemrushBot |
| User-agent: DotBot |
| User-agent: MJ12bot |
| Disallow: / |
| Comment: Blocked: Aggressive crawlers |
|
|
| # Bad actors |
| User-agent: BadBot |
| User-agent: SpoofBot |
| Disallow: / |
| Comment: Blocked: Known malicious crawlers |
|
|
| # ============================================ |
| # Analytics & Monitoring (Special Rules) |
| # ============================================ |
|
|
| # HuggingFace Analytics |
| User-agent: HFAnalytics |
| Allow: / |
| Crawl-delay: 0 |
|
|
| # Custom monitoring |
| User-agent: GiniginAI-Monitor |
| Allow: / |
| Crawl-delay: 0 |
|
|
| # ============================================ |
| # Notes for Future Implementation |
| # ============================================ |
| # 1. Replace "https://example.com" with actual domain |
| # 2. Add project-specific sitemaps as needed |
| # 3. Maintain AI crawler allowlist for optimal discoverability |
| # 4. Keep crawl-delay at 0 for important AI crawlers |
| # 5. Review and update quarterly as new AI services emerge |
|
|