Integrations / Platforms / WordPress / Zero-downtime Reindexing
Feb. 18, 2019

Zero-downtime Reindexing

Before going live, you may want to refactor you reindexing strategy to ensure reindexing creates no downtime. The previous indexing section went over the best way to get your data indexed. It’s very easy to understand, and it requires less records quota.

To reindex everything without any downtime, you’ll want to use iterators. This will create a temporary index, index all your data, and then rename the temporary index to replace the production one. This last operation, renaming, is atomic, so each user searching the old index will still get their results from it, and all new request will use the new index.

Reindexing records

To make reindexing use only one atomic operation, we’ll modify the reindex_posts command and:

  • Extract the query loop to an iterator
  • Use replaceAllObjects instead of saveObjects.

Create this new Iterator class

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
class Algolia_Post_Iterator implements Iterator {
    /**
     * @var array
     */
    private $queryArgs;

    private $key;

    private $paged;

    private $posts;
    private $type;

    public function __construct($type, array $queryArgs = []) {
        $this->type = $type;
        $this->queryArgs = ['post_type' => $type] + $queryArgs;
    }

    public function current() {
        return $this->serialize($this->posts[$this->key]);
    }

    public function next() {
        $this->key++;
    }

    public function key() {
        $this->key;
    }

    public function valid() {
        if (isset($this->posts[$this->key])) {
            return true;
        }

        $this->paged++;
        $query = new WP_Query(['paged' => $this->paged] + $this->queryArgs);

        if (! $query->have_posts()) {
            return false;
        }

        $this->posts = $query->posts;
        $this->key = 0;

        return true;
    }

    public function rewind() {
        $this->key = 0;
        $this->paged = 0;
        $this->posts = [];
    }

    private function serialize( WP_Post $post ) {
        $record = (array) apply_filters($this->type.'_to_record', $post);

        if (! isset($record['objectID'])) {
            $record['objectID'] = implode('#', [$post->post_type, $post->ID]);
        }

        return $record;
    }
}

Now the reindexing command is a lot simpler. We specify the query arguments to use, pass them to the iterator, and then pass this iterator to replaceAllObjects.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
public function reindex_post_atomic( $args, $assoc_args ) {
    global $algolia;

    $type = isset($assoc_args['type']) ? $assoc_args['type'] : 'post';

    $index = $algolia->initIndex(
        apply_filters('algolia_index_name', $type)
    );

    $queryArgs = [
        'posts_per_page' => 100,
        'post_status' => 'publish',
    ];

    $iterator = new Algolia_Post_Iterator($type, $queryArgs);

    $index->replaceAllObjects($iterator);

    WP_CLI::success("Reindexed $type posts in Algolia");
}

Reindexing records and configuration

The replaceAllObjects is actually a meta function, creating a temporary index until it’s renamed to the real index name. It hides a lot of complexity if you want to replace your records but it doesn’t play nice with other operations, such as setSettings.

If you want to resend settings, synonyms, and query rules when reindexing, you need to detail the steps as shown after with the reindex_post_atomic_with_config method.

Note that we’re using the same iterator as the previous example.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
public function reindex_post_atomic( $args, $assoc_args ) {
    global $algolia;

    $type = isset($assoc_args['type']) ? $assoc_args['type'] : 'post';

    $index = $algolia->initIndex(
        apply_filters('algolia_index_name', $type)
    );

    $queryArgs = [
        'posts_per_page' => 100,
        'post_status' => 'publish',
    ];

    $iterator = new Algolia_Post_Iterator($type, $queryArgs);

    $index->replaceAllObjects($iterator);

    WP_CLI::success("Reindexed $type posts in Algolia");
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
public function reindex_post_atomic_with_config( $args, $assoc_args ) {
    global $algolia;

    $type = isset($assoc_args['type']) ? $assoc_args['type'] : 'post';

    $temporaryName = sha1($type.time().mt_rand(0, 100));
    $finalIndexName = apply_filters('algolia_index_name', $type);
    $index = $algolia->initIndex($temporaryName);

    $settings = (array) apply_filters('get_'.$type.'_settings', []);
    unset($settings['replicas']);
    if ($settings) {
        $index->setSettings($settings);
    }

    $synonyms = (array) apply_filters('get_'.$type.'_synonyms', []);
    if ($synonyms) {
        $index->saveSynonyms($synonyms);
    }

    $rules = (array) apply_filters('get_'.$type.'$rules', []);
    if ($rules) {
        $index->saveRules($rules);
    }

    $queryArgs = [
        'posts_per_page' => 100,
        'post_status' => 'publish',
    ];

    $iterator = new Algolia_Post_Iterator($type, $queryArgs);
    $index->saveObjects($iterator);

    $algolia->moveIndex($temporaryName, $finalIndexName);

    WP_CLI::success("Reindexed $type posts in Algolia");
}

Note that in this case, the replicas are unset before we call setSettings, because an index cannot be moved if it has replicas.

One good thing to do when dealing with reindexing is to always set the settings first, it will make the whole reindex faster.

Did you find this page helpful?