웹사이트 robots.txt 예시
출처: https://www.rytongolfclub.co.uk/robots.txt
# Default exclusions
User-agent: *
Disallow: /assets/cache/
Disallow: /assets/docs/
Disallow: /assets/export/
Disallow: /assets/fileud/
Disallow: /assets/import/
Disallow: /assets/modules/
Disallow: /assets/plugins/
Disallow: /assets/snippets/
Disallow: /assets/packages/
Disallow: /assets/tvs/
Disallow: /cgi-bin/
Disallow: /code/
Disallow: /forum/
Disallow: /install/
Disallow: /manager/
Allow: /assets/snippets/dbcalx/
Allow: /assets/snippets/maxigallery/css/
# For sitemap
Sitemap: https://www.rytongolfclub.co.uk/sitemap
# Block MJ12bot as it is just noise
User-agent: MJ12bot
Disallow: /
# Block Ahrefs
User-agent: AhrefsBot
Disallow: /
# Block Sogou
User-agent: sogou spider
Disallow: /
# Block SEOkicks
User-agent: SEOkicks-Robot
Disallow: /
# Block BlexBot
User-agent: BLEXBot
Disallow: /
# Block SISTRIX
User-agent: SISTRIX Crawler
Disallow: /
# Block Uptime robot
User-agent: UptimeRobot/2.0
Disallow: /
User-agent: 008
Disallow: /
# Block Ezooms Robot
User-agent: Ezooms Robot
Disallow: /
# Block Perl LWP
User-agent: Perl LWP
Disallow: /
# Block BlexBot
User-agent: BLEXBot
Disallow: /
# Block netEstate NE Crawler (+http://www.website-datenbank.de/)
User-agent: netEstate NE Crawler (+http://www.website-datenbank.de/)
Disallow: /
# Block WiseGuys Robot
User-agent: WiseGuys Robot
Disallow: /
# Block Turnitin Robot
User-agent: Turnitin Robot
Disallow: /
User-agent: TurnitinBot
Disallow: /
User-agent: Turnitin Bot
Disallow: /
User-agent: TurnitinBot/3.0 (http://www.turnitin.com/robot/crawlerinfo.html)
Disallow: /
User-agent: TurnitinBot/3.0
Disallow: /
# Block Heritrix
User-agent: Heritrix
Disallow: /
# Block pricepi
User-agent: pimonster
Disallow: /
User-agent: Pimonster
Disallow: /
# Block Searchmetrics Bot
User-agent: SearchmetricsBot
Disallow: /
# Block Eniro
User-agent: ECCP/1.0 (search@eniro.com)
Disallow: /
# Block YandexBot
User-agent: Yandex
Disallow: /
# Block Baidu
User-agent: Baiduspider
User-agent: Baiduspider-video
User-agent: Baiduspider-image
User-agent: Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/3.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/4.0; +http://www.baidu.com/search/spider.html)
User-agent: Mozilla/5.0 (compatible; Baiduspider/5.0; +http://www.baidu.com/search/spider.html)
User-agent: Baiduspider/2.0
User-agent: Baiduspider/3.0
User-agent: Baiduspider/4.0
User-agent: Baiduspider/5.0
Disallow: /
# Block SoGou
User-agent: Sogou Spider
Disallow: /
# Block Youdao
User-agent: YoudaoBot
Disallow: /
# Block Nikon JP Crawler
User-agent: gsa-crawler (Enterprise; T4-KNHH62CDKC2W3; gsa_manage@nikon-sys.co.jp)
Disallow: /
# Block MegaIndex.ru
User-agent: MegaIndex.ru/2.0
Disallow: /
User-agent: MegaIndex.ru
Disallow: /
User-agent: megaIndex.ru
Disallow: /
User-agent: wp_is_mobile
Disallow: /
</pre
|
#robots.txt #로봇배제표준 #로봇배제프로토콜 #접근방지 #접근제한 #메타태그 #크롤링 #보안 #검색엔진