From 1992e591db3a94c8c75e62f29c932ba839fbc8bb Mon Sep 17 00:00:00 2001 From: madrilene Date: Fri, 27 Jun 2025 13:19:26 +0200 Subject: [PATCH] update robots.txt to include ai-robots-txt user-agent directives --- src/common/robots.njk | 100 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 14 deletions(-) diff --git a/src/common/robots.njk b/src/common/robots.njk index d9a48e0..cbdbf7e 100644 --- a/src/common/robots.njk +++ b/src/common/robots.njk @@ -3,19 +3,91 @@ permalink: /robots.txt eleventyExcludeFromCollections: true excludeFromSitemap: true --- -User-agent: * -Disallow: /404.html - -User-agent: GPTbot -Disallow: / - -User-agent: ChatGPT-User -Disallow: / - -User-agent: Google-Extended -Disallow: / - -User-agent: Omgilibot -Disallow: / +User-agent: * +Disallow: /404.html Sitemap: {{ meta.url }}/sitemap.xml + +# https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt + +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: aiHitBot +User-agent: Amazonbot +User-agent: Andibot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: bedrockbot +User-agent: Brightbot 1.0 +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-SearchBot +User-agent: Claude-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: cohere-ai +User-agent: cohere-training-data-crawler +User-agent: Cotoyogi +User-agent: Crawlspace +User-agent: Diffbot +User-agent: DuckAssistBot +User-agent: EchoboxBot +User-agent: FacebookBot +User-agent: facebookexternalhit +User-agent: Factset_spyderbot +User-agent: FirecrawlAgent +User-agent: FriendlyCrawler +User-agent: Google-CloudVertexBot +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: GPTBot +User-agent: iaskspider/2.0 +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: ISSCyberRiskCrawler +User-agent: Kangaroo Bot +User-agent: meta-externalagent +User-agent: Meta-ExternalAgent +User-agent: meta-externalfetcher +User-agent: Meta-ExternalFetcher +User-agent: MistralAI-User/1.0 +User-agent: MyCentralAIScraperBot +User-agent: NovaAct +User-agent: OAI-SearchBot +User-agent: omgili +User-agent: omgilibot +User-agent: Operator +User-agent: PanguBot +User-agent: Panscient +User-agent: panscient.com +User-agent: Perplexity-User +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: PhindBot +User-agent: Poseidon Research Crawler +User-agent: QualifiedBot +User-agent: QuillBot +User-agent: quillbot.com +User-agent: SBIntuitionsBot +User-agent: Scrapy +User-agent: SemrushBot +User-agent: SemrushBot-BA +User-agent: SemrushBot-CT +User-agent: SemrushBot-OCOB +User-agent: SemrushBot-SI +User-agent: SemrushBot-SWA +User-agent: Sidetrade indexer bot +User-agent: TikTokSpider +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: wpbot +User-agent: YandexAdditional +User-agent: YandexAdditionalBot +User-agent: YouBot +Disallow: / \ No newline at end of file