# ──────────────────────────────────────────────────────────
# AI Crawler Policy · Agent-Ready Template (by twin3)
# ──────────────────────────────────────────────────────────
# 用途:複製此檔案到網站根目錄 /robots.txt
#       (若已有 robots.txt,合併規則即可)
#
# 政策設計:
#   ✓ 允許「答案引擎」型爬蟲(會引用回連、帶流量)
#   ✗ 封鎖「純訓練」型爬蟲(把內容拿去訓練模型,不回連)
#   ✓ 允許用戶端 agent(用戶問問題時 on-demand 抓你)
#
# 請依貴公司內容授權策略調整 Allow / Disallow。
# 最後更新:2026-05 · 取得最新版:https://twin3.ai
# ──────────────────────────────────────────────────────────


# ═══════ 允許:答案引擎(會引用回連帶來流量)═══════

User-agent: Googlebot
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: OAI-SearchBot
Allow: /
# OpenAI search index(ChatGPT Search / SearchGPT 用)

User-agent: ChatGPT-User
Allow: /
# ChatGPT 用戶 on-demand 抓取(回答用戶當下問題)

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

User-agent: Claude-User
Allow: /
# Claude 用戶 on-demand


# ═══════ 封鎖:純訓練爬蟲(拿走內容不回連)═══════

User-agent: GPTBot
Disallow: /
# OpenAI 訓練用爬蟲

User-agent: ClaudeBot
Disallow: /
# Anthropic 訓練用爬蟲

User-agent: anthropic-ai
Disallow: /

User-agent: Google-Extended
Disallow: /
# Google Gemini / Bard 訓練(不影響 Search)

User-agent: CCBot
Disallow: /
# Common Crawl(被多家拿來訓練)

User-agent: Bytespider
Disallow: /
# ByteDance / Doubao

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: cohere-ai
Disallow: /


# ═══════ Sitemap / License ═══════

Sitemap: https://YOUR-DOMAIN.COM/sitemap.xml

# 如已建立 RSL 授權聲明,加上這行讓 AI 公司知道訓練須付費:
# License: https://YOUR-DOMAIN.COM/.well-known/rsl.xml