# ---------------------------- # 1) Default rules (all bots) # ---------------------------- User-agent: * Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /danke Disallow: /danke/ Disallow: /?s= Disallow: /search/ Disallow: /dank Disallow: /dank/ Disallow: /thanks Disallow: /thanks/ Disallow: /business-club-2021/ Disallow: /speaker-vortraege-xchange-2021/ Disallow: /lp/ Disallow: /internal-site-monitoring/ # WP Import Export Rule Disallow: /wp-content/uploads/wp-import-export-lite/ # Allow LLM guidance file(s) Allow: /llms.txt Allow: /llms-full.txt Sitemap: https://www.onventis.de/wpms-sitemap.xml # ----------------------------------------------------- # 2) OpenAI bots (training + search + user-triggered fetch) # ----------------------------------------------------- # GPTBot = training crawler User-agent: GPTBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # OAI-SearchBot = ChatGPT search/indexing crawler User-agent: OAI-SearchBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # ChatGPT-User = user-triggered fetches User-agent: ChatGPT-User Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # ----------------------------------------------------- # 3) Anthropic bots (training + search + user-triggered fetch) # ----------------------------------------------------- # ClaudeBot = training crawler User-agent: ClaudeBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # Claude-SearchBot = search/indexing crawler User-agent: Claude-SearchBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # Claude-User = user-triggered fetches User-agent: Claude-User Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # ----------------------------------------------------- # 4) Perplexity bots (search + user-triggered fetch) # ----------------------------------------------------- # PerplexityBot = search/indexing crawler User-agent: PerplexityBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # Note: Perplexity-User is documented as user-triggered and may ignore robots.txt, # so if you ever need to restrict it, you’ll typically need WAF/IP controls. # (Leaving it allowed matches your request to allow browsing/training.) User-agent: Perplexity-User Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # ----------------------------------------------------- # 5) Common Crawl (often used in training datasets) # ----------------------------------------------------- User-agent: CCBot Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/ # ----------------------------------------------------- # 6) Google Extended (controls AI training usage for Google systems) # ----------------------------------------------------- User-agent: Google-Extended Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-content/uploads/wp-import-export-lite/