In this tutorial we will review how to export the data of an index to a file.
To accomplish this, we will leverage the browse
feature of Algolia.
There is currently no way to export your index data directly from your Algolia Dashboard as indices can potentially be quite large (in the tens of gigabytes for example).
Exporting the index
The browse
method, detailed in the Browse index section, allows us to retrieve results beyond the 1,000 default limit.
After retrieving them, we’ll need to save them to a file.
If you’re using JavaScript, you can leverage the browseAll
method that will crawl the whole index and emit
events whenever a new chunk of records is fetched.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| // composer autoload
require __DIR__ . '/vendor/autoload.php';
// if you are not using composer
// require_once 'path/to/algoliasearch.php';
$client = new \AlgoliaSearch\Client('YourApplicationID', 'YourAdminAPIKey');
$index = $client->initIndex('your_index_name');
$objects = [];
foreach ($index->browse('') as $hit) {
$objects[] = $hit;
}
file_put_contents('your_filename', json_encode($objects));
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| require 'json'
require 'algoliasearch'
Algolia.init(application_id: 'YourApplicationID',
api_key: 'YourAdminAPIKey')
index = Algolia::Index.new('your_index_name')
hits = []
index.browse do |hit|
hits.push(hit)
end
File.write('your_filename', hits.to_json)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
| // const algoliasearch = require('algoliasearch');
// const algoliasearch = require('algoliasearch/reactnative');
// const algoliasearch = require('algoliasearch/lite');
// import algoliasearch from 'algoliasearch';
//
// or just use algoliasearch if you are using a <script> tag
// if you are using AMD module loader, algoliasearch will not be defined in window,
// but in the AMD modules of the page
const client = algoliasearch('YourApplicationID', 'YourAdminAPIKey');
const index = client.initIndex('your_index_name');
const fs = require('fs');
const browser = index.browseAll();
let hits = [];
browser.on('result', content => {
hits = hits.concat(content.hits);
});
browser.on('end', () => {
console.log('Finished!');
console.log('We got %d hits', hits.length);
fs.writeFile('browse.json', JSON.stringify(hits, null, 2), 'utf-8', err => {
if (err) throw err;
console.log('Your index has been exported!');
});
});
browser.on('error', err => {
throw err;
});
|
1
2
3
4
5
6
7
8
9
10
11
12
13
| import json
from algoliasearch.search_client import SearchClient
client = SearchClient.create('YourApplicationID', 'YourAdminAPIKey')
index = client.init_index('your_index_name')
hits = []
for hit in index.browse_objects({'query': ''}):
hits.append(hit)
with open('your_filename', 'w') as f:
json.dump(hits, f)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| SearchClient client = new SearchClient("YourApplicationID", "YourAdminAPIKey");
SearchIndex index = client.InitIndex("your_index_name");
var result = index.Browse<JObject>(new BrowseIndexQuery());
JArray records = new JArray();
foreach (var hit in result)
{
records.Add(hit);
}
using (StreamWriter file = File.CreateText(@"your_filename.json"))
{
JsonSerializer serializer = new JsonSerializer();
serializer.Serialize(file, records);
}
|
1
2
3
4
5
6
7
8
9
10
11
| SearchClient client =
DefaultSearchClient.create("YourApplicationID", "YourAdminAPIKey");
SearchIndex<Actor> index = client.initIndex("actors", Actor.class);
IndexIterable<Employee> results = index.browse(new BrowseIndexQuery());
List<Employee> records = new ArrayList<>();
results.forEach(records::add);
ObjectMapper objectMapper = Defaults.getObjectMapper();
objectMapper.writeValue(new File("your_filename.json"), records);
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
| package main
import (
"encoding/json"
"io"
"io/ioutil"
"os"
"github.com/algolia/algoliasearch-client-go/algolia/search"
)
type Actor struct {
Name string `json:"name"`
Rating int `json:"rating"`
ImagePath string `json:"image_path"`
AlternativeName string `json:"alternative_name"`
ObjectID string `json:"objectID"`
}
func main() {
client := search.NewClient("YourApplicationID", "YourAdminAPIKey")
index := client.InitIndex("actors")
it, err := index.BrowseObjects()
if err != nil {
// error handling
}
var actors []Actor
var actor Actor
for {
_, err = it.Next(&actor)
if err != nil {
if err == io.EOF {
break
}
// error handling
}
actors = append(actors, actor)
}
data, err := json.Marshal(actors)
if err != nil {
// error handling
}
err = ioutil.WriteFile("actors_downloaded.json", data, os.ModePerm)
if err != nil {
// error handling
}
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
| import java.io.{File, PrintWriter}
import algolia.AlgoliaDsl._
import algolia.objects.Query
import algolia.responses.ObjectID
import algolia.{AlgoliaClient, AlgoliaSyncHelper}
import org.json4s.native.Serialization.write
import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
import scala.concurrent.duration._
case class MyCaseClass(objectID: String, /* ... */) extends ObjectID
object Main {
def main(args: Array[String]): Unit = {
implicit val ec: ExecutionContextExecutor = ExecutionContext.global
implicit val awaitDuration: FiniteDuration = 10 seconds
val client = new AlgoliaClient("YourApplicationID", "YourAdminAPIKey")
val indexName = "your_index_name"
val helper = AlgoliaSyncHelper(client)
val records: Seq[MyCaseClass] = helper.browse[Show](indexName, Query(query = Some("")))
.flatten
.toSeq
val w = new PrintWriter(new File("your_filename.json"))
w.write(write(records))
w.close()
}
}
|
1
2
3
4
5
6
| val records = index.browseObjects().flatMap { response ->
response.hits.map { it.json }
}
val json = Json.stringify(JsonObjectSerializer.list, records)
File("your_filename.json").writeText(json)
|
Note that we use an empty query as the argument of the browse
to indicate that we want to browse all records.
In cases of large indices, you may need to consider “chunking” the data.