<?php // phpcs:ignore WordPress.Files.FileName.InvalidClassFileName
* Generate sitemap files in base XML as well as some namespace extensions.
* This module generates two different base sitemaps.
* The basic sitemap is updated regularly by wp-cron. It is stored in the
* database and retrieved when requested. This sitemap aims to include canonical
* URLs for all published content and abide by the sitemap spec. This is the root
* of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
* By default the sitemap contains published posts of type 'post' and 'page', as
* well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
* @link https://www.sitemaps.org/protocol.html Base sitemaps protocol.
* @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
* @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
* The news sitemap is generated on the fly when requested. It does not aim for
* completeness, instead including at most 1000 of the most recent published posts
* from the previous 2 days, per the news-sitemap spec.
* @link https://support.google.com/webmasters/answer/74288 News sitemap extension.
* @package automattic/jetpack
* @since 4.8.0 Remove 1000 post limit.
if ( ! defined( 'ABSPATH' ) ) {
/* Include all of the sitemap subclasses. */
require_once __DIR__ . '/sitemap-constants.php';
require_once __DIR__ . '/sitemap-buffer.php';
require_once __DIR__ . '/sitemap-buffer-fallback.php';
require_once __DIR__ . '/sitemap-buffer-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-page-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-image-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-video-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-news-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-master-xmlwriter.php';
require_once __DIR__ . '/sitemap-buffer-factory.php';
require_once __DIR__ . '/sitemap-stylist.php';
require_once __DIR__ . '/sitemap-librarian.php';
require_once __DIR__ . '/sitemap-finder.php';
require_once __DIR__ . '/sitemap-builder.php';
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
require_once __DIR__ . '/sitemap-logger.php';
// phpcs:disable Universal.Files.SeparateFunctionsFromOO.Mixed -- TODO: Move classes to appropriately-named class files.
* Governs the generation, storage, and serving of sitemaps.
* @phan-constructor-used-for-side-effects
class Jetpack_Sitemap_Manager {
* Librarian object for storing and retrieving sitemap data.
* @see Jetpack_Sitemap_Librarian
* @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
* Logger object for reporting debug messages.
* @see Jetpack_Sitemap_Logger
* @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
* Finder object for handling sitemap URIs.
* @see Jetpack_Sitemap_Finder
* @var Jetpack_Sitemap_Finder $finder Finder object for handling with sitemap URIs.
* Construct a new Jetpack_Sitemap_Manager.
public function __construct() {
$this->librarian = new Jetpack_Sitemap_Librarian();
$this->finder = new Jetpack_Sitemap_Finder();
if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
$this->logger = new Jetpack_Sitemap_Logger();
// Add callback for sitemap URL handler.
array( $this, 'callback_action_catch_sitemap_urls' ),
defined( 'IS_WPCOM' ) && IS_WPCOM ? 100 : 10
// Add generator to wp_cron task list.
$this->schedule_sitemap_generation();
// Add sitemap to robots.txt.
array( $this, 'callback_action_do_robotstxt' ),
// The news sitemap is cached; here we add a callback to
// flush the cached news sitemap when a post is published.
array( $this, 'callback_action_flush_news_sitemap_cache' ),
// In case we need to purge all sitemaps, we do this.
'jetpack_sitemaps_purge_data',
array( $this, 'callback_action_purge_data' )
* Module parameters are stored as options in the database.
* This allows us to avoid having to process all of init
* before serving the sitemap data. The following actions
* process and store these filters.
// Process filters and store location string for sitemap.
array( $this, 'callback_action_filter_sitemap_location' ),
* Echo a raw string of given content-type.
* @param string $the_content_type The content type to be served.
* @param string $the_content The string to be echoed.
private function serve_raw_and_die( $the_content_type, $the_content ) {
header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
$wp_query->is_feed = true;
set_query_var( 'feed', 'sitemap' );
if ( '' === $the_content ) {
$error = __( 'No sitemap found. Please try again later.', 'jetpack' );
if ( current_user_can( 'manage_options' ) ) {
$next = human_time_diff( wp_next_scheduled( 'jp_sitemap_cron_hook' ) );
/* translators: %s is a human_time_diff until next sitemap generation. */
$error = sprintf( __( 'No sitemap found. The system will try to build it again in %s.', 'jetpack' ), $next );
esc_html__( 'Sitemaps', 'jetpack' ),
echo $the_content; // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- All content created by Jetpack.
* Callback to intercept sitemap url requests and serve sitemap files.
public function callback_action_catch_sitemap_urls() {
// Regular expressions for sitemap URL routing.
'sitemap' => '/^sitemap-[1-9][0-9]*\.xml$/',
'index' => '/^sitemap-index-[1-9][0-9]*\.xml$/',
'image' => '/^image-sitemap-[1-9][0-9]*\.xml$/',
'image-index' => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
'video' => '/^video-sitemap-[1-9][0-9]*\.xml$/',
'video-index' => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
// The raw path(+query) of the requested URI.
if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
$raw_uri = sanitize_text_field(
wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
$request = $this->finder->recognize_sitemap_uri( $raw_uri );
if ( isset( $request['sitemap_name'] ) ) {
* Filter the content type used to serve the sitemap XML files.
* @param string $xml_content_type By default, it's 'text/xml'.
$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
// Catch master sitemap xml.
if ( 'sitemap.xml' === $request['sitemap_name'] ) {
$sitemap_content = $this->librarian->get_sitemap_text(
jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
// if there is no master sitemap yet, let's just return an empty sitemap with a short TTL instead of a 404.
if ( empty( $sitemap_content ) ) {
$builder = new Jetpack_Sitemap_Builder();
$sitemap_content = $builder->empty_sitemap_xml();
$this->serve_raw_and_die(
if ( 'sitemap.xsl' === $request['sitemap_name'] ) {
$this->serve_raw_and_die(
Jetpack_Sitemap_Stylist::sitemap_xsl()
// Catch sitemap index xsl.
if ( 'sitemap-index.xsl' === $request['sitemap_name'] ) {
$this->serve_raw_and_die(
Jetpack_Sitemap_Stylist::sitemap_index_xsl()
// Catch image sitemap xsl.
if ( 'image-sitemap.xsl' === $request['sitemap_name'] ) {
$this->serve_raw_and_die(
Jetpack_Sitemap_Stylist::image_sitemap_xsl()
// Catch video sitemap xsl.
if ( 'video-sitemap.xsl' === $request['sitemap_name'] ) {
$this->serve_raw_and_die(
Jetpack_Sitemap_Stylist::video_sitemap_xsl()
// Catch news sitemap xml.
if ( 'news-sitemap.xml' === $request['sitemap_name'] ) {
$sitemap_builder = new Jetpack_Sitemap_Builder();
$this->serve_raw_and_die(
$sitemap_builder->news_sitemap_xml()
// Catch news sitemap xsl.
if ( 'news-sitemap.xsl' === $request['sitemap_name'] ) {
$this->serve_raw_and_die(
Jetpack_Sitemap_Stylist::news_sitemap_xsl()
if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
// Catch sitemap index xml.
if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
JP_PAGE_SITEMAP_INDEX_TYPE
// Catch image sitemap xml.
if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
// Catch image sitemap index xml.
if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
JP_IMAGE_SITEMAP_INDEX_TYPE
// Catch video sitemap xml.
if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
// Catch video sitemap index xml.
if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
$this->serve_raw_and_die(
$this->librarian->get_sitemap_text(
$request['sitemap_name'],
JP_VIDEO_SITEMAP_INDEX_TYPE
* Callback for adding sitemap-interval to the list of schedules.
* @param array $schedules The array of WP_Cron schedules.
* @return array The updated array of WP_Cron schedules.
public function callback_add_sitemap_schedule( $schedules ) {
$schedules['sitemap-interval'] = array(
'interval' => JP_SITEMAP_INTERVAL,
'display' => __( 'Sitemap Interval', 'jetpack' ),
* Callback handler for sitemap cron hook
public function callback_sitemap_cron_hook() {
$sitemap_builder = new Jetpack_Sitemap_Builder();
$sitemap_builder->update_sitemap();
* Add actions to schedule sitemap generation.
* Should only be called once, in the constructor.
private function schedule_sitemap_generation() {
add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) ); // phpcs:ignore WordPress.WP.CronInterval.ChangeDetected
array( $this, 'callback_sitemap_cron_hook' )
if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
* Filter the delay in seconds until sitemap generation cron job is started.
* This filter allows a site operator or hosting provider to potentialy spread out sitemap generation for a
* lot of sites over time. By default, it will be randomly done over 15 minutes.
* @param int $delay Time to delay in seconds.
$delay = apply_filters( 'jetpack_sitemap_generation_delay', MINUTE_IN_SECONDS * wp_rand( 1, 15 ) ); // Randomly space it out to start within next fifteen minutes.
* Callback to add sitemap to robots.txt.
public function callback_action_do_robotstxt() {
* Filter whether to make the default sitemap discoverable to robots or not. Default true.
* @param bool $discover_sitemap Make default sitemap discoverable to robots.
$discover_sitemap = apply_filters_deprecated( 'jetpack_sitemap_generate', array( true ), 'jetpack-7.4.0', 'jetpack_sitemap_include_in_robotstxt' );
* Filter whether to make the default sitemap discoverable to robots or not. Default true.
* @param bool $discover_sitemap Make default sitemap discoverable to robots.
$discover_sitemap = apply_filters( 'jetpack_sitemap_include_in_robotstxt', $discover_sitemap );
if ( true === $discover_sitemap ) {
$sitemap_url = $this->finder->construct_sitemap_url( 'sitemap.xml' );
echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
* Filter whether to make the news sitemap discoverable to robots or not. Default true.
* @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
$discover_news_sitemap = apply_filters_deprecated( 'jetpack_news_sitemap_generate', array( true ), 'jetpack-7.4.0', 'jetpack_news_sitemap_include_in_robotstxt' );
* Filter whether to make the news sitemap discoverable to robots or not. Default true.
* @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_include_in_robotstxt', $discover_news_sitemap );
if ( true === $discover_news_sitemap ) {
$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
* Callback to delete the news sitemap cache.