Create
robots.txtfor your Gatsby site on build.
yarn add gatsby-plugin-robots-txt
or
npm install --save gatsby-plugin-robots-txt
gatsby-config.js
module.exports = {
siteMetadata: {
siteUrl: 'https://www.example.com'
},
plugins: ['gatsby-plugin-robots-txt']
};
This plugin uses
generate-robotstxt to generate content
of
robots.txt and it has the following options:
|Name
|Type
|Default
|Description
host
String
${siteMetadata.siteUrl}
|Host of your site
sitemap
String /
String[]
${siteMetadata.siteUrl}/sitemap/sitemap-index.xml
|Path(s) to
sitemap.xml
policy
Policy[]
[]
|List of
Policy rules
configFile
String
undefined
|Path to external config file
output
String
/robots.txt
|Path where to create the
robots.txt
gatsby-config.js
module.exports = {
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
host: 'https://www.example.com',
sitemap: 'https://www.example.com/sitemap.xml',
policy: [{userAgent: '*', allow: '/'}]
}
}
]
};
env-option
gatsby-config.js
module.exports = {
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
host: 'https://www.example.com',
sitemap: 'https://www.example.com/sitemap.xml',
env: {
development: {
policy: [{userAgent: '*', disallow: ['/']}]
},
production: {
policy: [{userAgent: '*', allow: '/'}]
}
}
}
}
]
};
The
env key will be taken from
process.env.GATSBY_ACTIVE_ENV first (
see Gatsby Environment Variables for more information on this
variable), falling back to
process.env.NODE_ENV. When this is not available then it defaults to
development.
You can resolve the
env key by using
resolveEnv function:
gatsby-config.js
module.exports = {
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
host: 'https://www.example.com',
sitemap: 'https://www.example.com/sitemap.xml',
resolveEnv: () => process.env.GATSBY_ENV,
env: {
development: {
policy: [{userAgent: '*', disallow: ['/']}]
},
production: {
policy: [{userAgent: '*', allow: '/'}]
}
}
}
}
]
};
configFile-option
You can use the
configFile option to set specific external configuration:
gatsby-config.js
module.exports = {
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
configFile: 'robots-txt.config.js'
}
}
]
};
robots-txt.config.js
module.exports = {
host: 'https://www.example.com',
sitemap: 'https://www.example.com/sitemap.xml',
policy: [{userAgent: '*'}]
};
If you would like to disable crawlers for deploy-previews you can use the following snippet:
gatsby-config.js
const {
NODE_ENV,
URL: NETLIFY_SITE_URL = 'https://www.example.com',
DEPLOY_PRIME_URL: NETLIFY_DEPLOY_URL = NETLIFY_SITE_URL,
CONTEXT: NETLIFY_ENV = NODE_ENV
} = process.env;
const isNetlifyProduction = NETLIFY_ENV === 'production';
const siteUrl = isNetlifyProduction ? NETLIFY_SITE_URL : NETLIFY_DEPLOY_URL;
module.exports = {
siteMetadata: {
siteUrl
},
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
resolveEnv: () => NETLIFY_ENV,
env: {
production: {
policy: [{userAgent: '*'}]
},
'branch-deploy': {
policy: [{userAgent: '*', disallow: ['/']}],
sitemap: null,
host: null
},
'deploy-preview': {
policy: [{userAgent: '*', disallow: ['/']}],
sitemap: null,
host: null
}
}
}
}
]
};
query-option
By default the site URL will come from the Gatsby node
site.siteMeta.siteUrl. Like
in Gatsby's sitemap plugin an optional GraphQL query can be
used to provide a different value from another data source as long as it returns the same shape:
gatsby-config.js
module.exports = {
plugins: [
{
resolve: 'gatsby-plugin-robots-txt',
options: {
query: `{
site: MyCustomDataSource {
siteMetadata {
siteUrl
}
}
}`
}
}
]
};
Easy to use, does what it says. The configuration is also conventional - right at the congif.js file.