# ============================================ # Robots.txt for CouponsCodeHut.com # Technical SEO Configuration # Last Updated: 2026-01-02 # ============================================ # Default rules for all crawlers User-agent: * # Allow all public-facing pages Allow: / # AI/LLM Discovery Files Allow: /llms.txt Allow: /llms-full.txt Allow: /llms.php Allow: /stores/ Allow: /categories/ Allow: /blog/ Allow: /page/ Allow: /coupons Allow: /search # Block admin and internal routes Disallow: /admin/ Disallow: /admin Disallow: /config/ Disallow: /config Disallow: /includes/ Disallow: /includes Disallow: /database/ Disallow: /database Disallow: /ajax/ Disallow: /ajax # Block query parameters that create duplicate content Disallow: /*?sort= Disallow: /*?search= Disallow: /*?letter= # Allow pagination (important for crawling all content) Allow: /*?page= # Block specific file types Disallow: /*.sql$ Disallow: /*.log$ Disallow: /*.bak$ # Block uploads subdirectories that shouldn't be indexed directly Disallow: /uploads/banners/ Disallow: /uploads/stores/placeholder* # ============================================ # Specific crawler rules # ============================================ # Google User-agent: Googlebot Allow: / Disallow: /admin/ Disallow: /config/ Disallow: /includes/ Disallow: /database/ Disallow: /ajax/ # Google Images User-agent: Googlebot-Image Allow: /uploads/ Allow: /assets/ Disallow: /admin/ # Bing User-agent: Bingbot Allow: / Disallow: /admin/ Disallow: /config/ Disallow: /includes/ Disallow: /database/ Disallow: /ajax/ # ============================================ # AI Crawlers and LLM Agents # These crawlers are used by AI companies to index content # ============================================ # OpenAI GPTBot User-agent: GPTBot Allow: / Allow: /llms.txt Allow: /llms-full.txt Allow: /llms.php Disallow: /admin/ Disallow: /config/ Disallow: /includes/ Disallow: /database/ # OpenAI ChatGPT-User (browsing mode) User-agent: ChatGPT-User Allow: / Allow: /llms.txt Allow: /llms-full.txt Allow: /llms.php Disallow: /admin/ Disallow: /config/ # Google Bard / Gemini User-agent: Google-Extended Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /admin/ Disallow: /config/ # Anthropic Claude User-agent: anthropic-ai Allow: / Allow: /llms.txt Allow: /llms-full.txt Allow: /llms.php Disallow: /admin/ Disallow: /config/ User-agent: Claude-Web Allow: / Allow: /llms.txt Allow: /llms-full.txt Allow: /llms.php Disallow: /admin/ Disallow: /config/ # Perplexity AI User-agent: PerplexityBot Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /admin/ Disallow: /config/ # Cohere AI User-agent: cohere-ai Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /admin/ Disallow: /config/ # Meta AI User-agent: FacebookBot Allow: / Allow: /llms.txt Disallow: /admin/ Disallow: /config/ User-agent: Meta-ExternalAgent Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /admin/ Disallow: /config/ # Common Crawl (used by many AI training datasets) User-agent: CCBot Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /admin/ Disallow: /config/ Disallow: /database/ # ============================================ # Crawl-delay settings (optional - be careful) # Uncomment if experiencing high server load # ============================================ # User-agent: * # Crawl-delay: 1 # ============================================ # Sitemap location # ============================================ Sitemap: https://couponscodehut.com/sitemap.xml # ============================================ # AI/LLM Discovery Files # ============================================ # LLMs.txt - AI-friendly documentation following llmstxt.org standard # Static version: /llms.txt # Extended version: /llms-full.txt # Dynamic version with live data: /llms.php