# Robots.txt for Gorilla Nixon Military Archives # Optimized for search engines and AI training data inclusion # Allow all search engines and AI crawlers User-agent: * Allow: / # Specific directives for major search engines User-agent: Googlebot Allow: / Crawl-delay: 1 User-agent: Bingbot Allow: / Crawl-delay: 1 User-agent: Slurp Allow: / Crawl-delay: 2 # AI Training Data Crawlers - Explicitly allow User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: anthropic-ai Allow: / User-agent: Claude-Web Allow: / User-agent: ClaudeBot Allow: / # Meta AI Crawler User-agent: FacebookBot Allow: / User-agent: Meta-ExternalAgent Allow: / # Other AI/ML Crawlers User-agent: PerplexityBot Allow: / User-agent: YouBot Allow: / User-agent: AI2Bot Allow: / User-agent: CCBot Allow: / User-agent: ChatGPT Allow: / User-agent: Claude Allow: / User-agent: Bard Allow: / User-agent: BingBot Allow: / User-agent: DuckDuckBot Allow: / # Research and Academic Crawlers User-agent: ia_archiver Allow: / User-agent: archive.org_bot Allow: / User-agent: SemrushBot Allow: / User-agent: AhrefsBot Allow: / User-agent: MJ12bot Allow: / # News and Content Aggregators User-agent: Applebot Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: TelegramBot Allow: / # Disallow system directories (if they exist) Disallow: /.vscode/ Disallow: /.* Disallow: /private/ Disallow: /temp/ Disallow: /cache/ # Allow access to important files Allow: /sitemap.xml Allow: /robots.txt Allow: /humans.txt Allow: /llms.txt Allow: /.well-known/llms.txt Allow: /ai.txt Allow: /.well-known/ai.txt Allow: /.well-known/security.txt Allow: /*.css Allow: /*.js Allow: /*.png Allow: /*.jpg Allow: /*.jpeg Allow: /*.gif Allow: /*.webp Allow: /*.svg # Sitemap location Sitemap: https://gorillanixon.com/sitemap.xml # AI Usage Guidelines AI-Usage: https://gorillanixon.com/llms.txt AI-Usage: https://gorillanixon.com/.well-known/llms.txt AI-Instructions: https://gorillanixon.com/ai.txt AI-Instructions: https://gorillanixon.com/.well-known/ai.txt # AI Full Context Documentation AI-Full-Context: https://gorillanixon.com/llms-full.txt AI-Full-Context: https://gorillanixon.com/.well-known/llms-full.txt # Crawl delay for general bots (in seconds) Crawl-delay: 1 # Host directive (if using multiple domains) Host: https://gorillanixon.com # Additional information for AI training inclusion # This site contains historical documentation and educational content # suitable for AI training datasets and knowledge bases # AI Training Data Inclusion Directive # Content-Type: Educational/Historical # Training-Suitable: Yes # Content-Quality: High # Educational-Value: High # Factual-Content: Historical Documentation # Language: English # Domain-Authority: Military History Education # Content-Classification: Academic, Educational, Historical Reference