/* 
 * Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
 *
 * This file is part of Rast.
 * See the file COPYING for redistribution information.
 *
 */

#include "rast/config.h"

#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>

#include <apr_tables.h>
#include <apr_hash.h>
#include <apr_strings.h>
#include <apr_ring.h>

#include "rast/text_index.h"
#include "rast/query.h"

#if !HAVE_FSEEKO && !defined(fseeko)
#define fseeko fseek
#endif
#if !HAVE_FTELLO && !defined(ftello)
#define ftello ftell
#endif

#define RARE_NGRAM_DB_MAX_VALUE_SIZE (5 * 3)

typedef struct free_list_entry_t {
    APR_RING_ENTRY(free_list_entry_t) link;
    rast_size_t block_no;
    rast_size_t block_count;
} free_list_entry_t;

typedef APR_RING_HEAD(free_list_head_t, free_list_entry_t) free_list_head_t;

typedef struct {
    apr_pool_t *pool;
    rast_size_t version;
    free_list_head_t *first;
    free_list_head_t *added;
    free_list_head_t *recycled;
} free_list_t;

static free_list_t *
free_list_create(apr_pool_t *pool)
{
    free_list_t *free_list;

    free_list = (free_list_t *) apr_palloc(pool, sizeof(free_list_t));
    free_list->pool = pool;
    free_list->version = 0;
    free_list->first =
        (free_list_head_t *) apr_palloc(pool, sizeof(free_list_head_t));
    APR_RING_INIT(free_list->first, free_list_entry_t, link);
    free_list->added =
        (free_list_head_t *) apr_palloc(pool, sizeof(free_list_head_t));
    APR_RING_INIT(free_list->added, free_list_entry_t, link);
    free_list->recycled =
        (free_list_head_t *) apr_palloc(pool, sizeof(free_list_head_t));
    APR_RING_INIT(free_list->recycled, free_list_entry_t, link);
    return free_list;
}

static void
free_list_clear(free_list_t *free_list)
{
    APR_RING_CONCAT(free_list->first, free_list->added,
                    free_list_entry_t, link);
    APR_RING_CONCAT(free_list->recycled, free_list->first,
                    free_list_entry_t, link);
}

static void
list_prepend(free_list_t *free_list,
             free_list_head_t *head,
             rast_size_t block_no, rast_size_t block_count)
{
    free_list_entry_t *entry;

    if (APR_RING_EMPTY(free_list->recycled, free_list_entry_t, link)) {
        entry = (free_list_entry_t *) apr_palloc(free_list->pool,
                                                 sizeof(free_list_entry_t));
    }
    else {
        entry = APR_RING_FIRST(free_list->recycled);
        APR_RING_REMOVE(entry, link);
    }
    entry->block_no = block_no;
    entry->block_count = block_count;
    APR_RING_INSERT_HEAD(head, entry, free_list_entry_t, link);
}

static void
free_list_add(free_list_t *free_list,
              rast_size_t block_no, rast_size_t block_count)
{
    list_prepend(free_list, free_list->added, block_no, block_count);
}

#define FREE_AREA_NOT_FOUND ((rast_size_t) -1)

static rast_size_t
free_list_search(free_list_t *free_list, rast_size_t block_count)
{
    free_list_entry_t *entry;
    free_list_entry_t *candidate = NULL;

    for (entry = APR_RING_FIRST(free_list->first);
         entry != APR_RING_SENTINEL(free_list->first, free_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        if (entry->block_count == block_count) {
            APR_RING_REMOVE(entry, link);
            APR_RING_INSERT_HEAD(free_list->recycled, entry,
                                 free_list_entry_t, link);
            return entry->block_no;
        }
        if (entry->block_count > block_count) {
            if (candidate == NULL ||
                entry->block_count < candidate->block_count) {
                candidate = entry;
            }
        }
    }
    if (candidate == NULL) {
        return FREE_AREA_NOT_FOUND;
    }
    else {
        rast_size_t block_no = candidate->block_no;

        candidate->block_no += block_count;
        candidate->block_count -= block_count;
        return block_no;
    }
}

typedef struct pos_list_entry_t {
    APR_RING_ENTRY(pos_list_entry_t) link;
    rast_pos_t pos;
} pos_list_entry_t;

typedef APR_RING_HEAD(pos_list_head_t, pos_list_entry_t) pos_list_head_t;

typedef struct cache_list_entry_t {
    APR_RING_ENTRY(cache_list_entry_t) link;
    rast_doc_id_t doc_id;
    pos_list_head_t *positions;
} cache_list_entry_t;

typedef APR_RING_HEAD(cache_list_head_t, cache_list_entry_t) cache_list_head_t;

struct rast_text_index_t {
    apr_pool_t *pool;
    int flags;
    rast_encoding_module_t *encoding_module;
    DB *ngram_db;
    DB *rare_ngram_db;
    FILE *pos_file;
    rast_size_t pos_block_size;
    apr_hash_t *cache;
    apr_pool_t *cache_pool;
    char *free_list_name;
    free_list_t *free_list;
    int is_native;
};

struct rast_text_indexer_t {
    apr_pool_t *pool;
    rast_text_index_t *index;
    rast_doc_id_t doc_id;
    rast_pos_t base_pos;
    apr_hash_t *position_table;
};

static rast_error_t *
write_rast_size_value(FILE *fp, rast_size_t i, int is_native)
{
    rast_size_t n;

    n = rast_fix_byte_order(i, is_native);
    if (fwrite(&n, sizeof(rast_size_t), 1, fp) < 1) {
        return os_error_to_rast_error(errno);
    }
    return RAST_OK;
}

static rast_error_t *
save_free_list(rast_text_index_t *index)
{
    rast_error_t *error = RAST_OK;
    free_list_t *free_list = index->free_list;
    free_list_entry_t *entry;
    FILE *fp;

    APR_RING_CONCAT(free_list->first, free_list->added,
                    free_list_entry_t, link);

    if ((fp = fopen(index->free_list_name, "w")) == NULL) {
        return os_error_to_rast_error(errno);
    }

    free_list->version++;
    error = write_rast_size_value(fp, free_list->version, index->is_native);
    if (error != RAST_OK) {
        fclose(fp);
        return error;
    }

    for (entry = APR_RING_FIRST(free_list->first);
         entry != APR_RING_SENTINEL(free_list->first, free_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        error = write_rast_size_value(fp, entry->block_no, index->is_native);
        if (error != RAST_OK) {
            break;
        }
        error = write_rast_size_value(fp, entry->block_count,
                                      index->is_native);
        if (error != RAST_OK) {
            break;
        }
    }
    fclose(fp);
    return error;
}

static rast_error_t *
read_rast_size_value(FILE *fp, rast_size_t *i, int is_native)
{
    rast_size_t n;

    if (fread(&n, sizeof(rast_size_t), 1, fp) < 1) {
        if (feof(fp)) {
            return rast_error(RAST_ERROR_EOF, NULL);
        }
        return os_error_to_rast_error(errno);
    }
    *i = rast_fix_byte_order(n, is_native);
    return RAST_OK;
}

static rast_error_t *
load_free_list(rast_text_index_t *index)
{
    rast_error_t *error = RAST_OK;
    FILE *fp;
    rast_size_t version;

    if ((fp = fopen(index->free_list_name, "r")) == NULL) {
        if (errno == ENOENT) {
            return RAST_OK;
        }
        return os_error_to_rast_error(errno);
    }

    error = read_rast_size_value(fp, &version, index->is_native);
    if (error != RAST_OK) {
        fclose(fp);
        return error;
    }
    if (index->free_list->version == version) {
        return RAST_OK;
    }
    free_list_clear(index->free_list);
    index->free_list->version = version;
    while (1) {
        rast_size_t block_no, block_count;

        error = read_rast_size_value(fp, &block_no, index->is_native);
        if (error != RAST_OK) {
            if (rast_error_is_eof(error)) {
                rast_error_destroy(error);
                error = RAST_OK;
            }
            break;
        }
        error = read_rast_size_value(fp, &block_count, index->is_native);
        if (error != RAST_OK) {
            break;
        }
        list_prepend(index->free_list, index->free_list->first,
                     block_no, block_count);
    }
    fclose(fp);
    return error;
}

static rast_error_t *
open_db(DB **dbp, const char *basename, const char *ext, int flags,
        DB_ENV *db_env, int lorder, apr_pool_t *pool)
{
    DB *db;
    uint32_t db_flags;
    char *filename;
    int dberr;

    filename = apr_pstrcat(pool, basename, ext, NULL);
    dberr = db_create(&db, db_env, 0);
    if (dberr != 0) {
        return db_error_to_rast_error(dberr);
    }
    db_flags = flags & RAST_DB_RDONLY ? DB_RDONLY : DB_CREATE;
    if (lorder != 0) {
        db->set_lorder(db, lorder);
    }
    dberr = db->open(db, NULL, filename, NULL, DB_BTREE, db_flags, 0666);
    if (dberr != 0) {
        db->close(db, 0);
        return db_error_to_rast_error(dberr);
    }
    *dbp = db;
    return RAST_OK;
}

rast_error_t *
rast_text_index_open(rast_text_index_t **index,
                     const char *filename, int flags,
                     rast_encoding_module_t *encoding_module, DB_ENV *db_env,
                     int lorder, rast_size_t pos_block_size, apr_pool_t *pool)
{
    apr_pool_t *sub_pool;
    rast_error_t *error;
    DB *ngram_db, *rare_ngram_db;
    FILE *pos_file;
    char *pos_file_path;

    apr_pool_create(&sub_pool, pool);
    error = open_db(&ngram_db, filename, ".ngm", flags, db_env, lorder,
                    sub_pool);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }
    error = open_db(&rare_ngram_db, filename, ".rng", flags, db_env, lorder,
                    sub_pool);
    if (error != RAST_OK) {
        ngram_db->close(ngram_db, 0);
        apr_pool_destroy(sub_pool);
        return error;
    }
    pos_file_path = apr_pstrcat(sub_pool, filename, ".pos", NULL);
    if (flags & RAST_DB_RDONLY) {
        pos_file = fopen(pos_file_path, "r");
    }
    else {
        pos_file = fopen(pos_file_path, "r+");
        if (pos_file == NULL && errno == ENOENT) {
            pos_file = fopen(pos_file_path, "w+");
        }
    }
    if (pos_file == NULL) {
        rare_ngram_db->close(rare_ngram_db, 0);
        ngram_db->close(ngram_db, 0);
        apr_pool_destroy(sub_pool);
        return os_error_to_rast_error(errno);
    }
    *index = apr_palloc(pool, sizeof(rast_text_index_t));
    (*index)->pool = pool;
    (*index)->flags = flags;
    (*index)->encoding_module = encoding_module;
    (*index)->ngram_db = ngram_db;
    (*index)->rare_ngram_db = rare_ngram_db;
    (*index)->pos_file = pos_file;
    (*index)->pos_block_size = pos_block_size;
    apr_pool_create(&(*index)->cache_pool, pool);
    (*index)->cache = apr_hash_make((*index)->cache_pool);
    (*index)->free_list_name = apr_pstrcat(pool, filename, ".pfl", NULL);
    (*index)->free_list = free_list_create(pool);
    (*index)->is_native = 1;
    if ((error = load_free_list(*index)) != RAST_OK) {
        ngram_db->close(ngram_db, 0);
        rare_ngram_db->close(rare_ngram_db, 0);
        fclose(pos_file);
        apr_pool_destroy(sub_pool);
        return error;
    }
    (*index)->is_native = 1;
    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

rast_error_t *
rast_text_index_close(rast_text_index_t *index)
{
    rast_error_t *error = RAST_OK;

    if (!(index->flags & RAST_DB_RDONLY)) {
        error = rast_text_index_sync(index);
    }
    index->ngram_db->close(index->ngram_db, 0);
    index->rare_ngram_db->close(index->rare_ngram_db, 0);
    fclose(index->pos_file);
    return error;
}

static int
number_length(int n)
{
    int len = 0;

    if (n == 0) {
        return 1;
    }
    while (n > 0) {
        n = n / 0x80;
        len++;
    }
    return len;
}

static int
pack_number(char *s, int n)
{
    char *p = s;
    div_t d;

    if (n == 0) {
        *p = 0;
        return 1;
    }
    while (n > 0) {
        d = div(n, 0x80);
        n = d.quot;
        if (n > 0) {
            *p = d.rem | 0x80;
        }
        else {
            *p = d.rem;
        }
        p++;
    }
    return p - s;
}

static inline int
unpack_number(const char *s, int len, int *np)
{
    const char *p = s, *pend = s + len;
    int base = 1, n = 0;

    while (p < pend) {
        if (*p & 0x80) {
            n += base * (*p & 0x7F);
            base *= 0x80;
            p++;
        }
        else {
            n += *p * base;
            p++;
            break;
        }
    }
    *np = n;
    return p - s;
}

rast_error_t *
rast_text_index_register(rast_text_index_t *index,
                         rast_doc_id_t doc_id,
                         rast_text_indexer_t **indexer, apr_pool_t *pool)
{
    if (index->flags & RAST_DB_RDONLY) {
        return rast_error(RAST_ERROR_BAD_DB, "can't register to read-only db");
    }

    *indexer = (rast_text_indexer_t *) apr_palloc(pool,
                                                  sizeof(rast_text_indexer_t));
    (*indexer)->pool = pool;
    (*indexer)->index = index;
    (*indexer)->doc_id = doc_id;
    (*indexer)->base_pos = 0;
    (*indexer)->position_table = apr_hash_make(pool);
    return RAST_OK;
}

rast_error_t *
rast_text_indexer_add(rast_text_indexer_t *indexer,
                      const char *s, int nbytes, rast_size_t *registered_chars)
{
    rast_text_index_t *index = indexer->index;
    apr_hash_t *position_table = indexer->position_table;
    rast_tokenizer_t *tokenizer;
    rast_token_t token = { 0 };
    rast_error_t *error;

    tokenizer = rast_register_tokenizer_create(indexer->pool,
                                               index->encoding_module,
                                               s, nbytes);
    while (!rast_register_tokenizer_is_done(tokenizer)) {
        pos_list_head_t *positions;
        pos_list_entry_t *entry;

        error = rast_register_tokenizer_get_current(tokenizer, &token);
        if (error != RAST_OK) {
            return error;
        }
        positions = apr_hash_get(position_table, token.ptr, token.nbytes);
        if (positions == NULL) {
            positions = (pos_list_head_t *)
                apr_palloc(index->cache_pool, sizeof(pos_list_head_t));
            APR_RING_INIT(positions, pos_list_entry_t, link);
            apr_hash_set(position_table, token.ptr, token.nbytes, positions);
        }
        entry =
            (pos_list_entry_t *) apr_palloc(index->cache_pool,
                                            sizeof(pos_list_entry_t));
        entry->pos = indexer->base_pos + token.pos;
        APR_RING_INSERT_TAIL(positions, entry, pos_list_entry_t, link);
        /* TODO: need test */
        error = rast_register_tokenizer_next(tokenizer);
        if (error != RAST_OK) {
            return error;
        }
    }
    indexer->base_pos += nbytes;
    if (registered_chars != NULL) {
        *registered_chars = token.pos + token.nchars;
    }
    return RAST_OK;
}

rast_error_t *
rast_text_indexer_commit(rast_text_indexer_t *indexer)
{
    apr_hash_index_t *hi;
    rast_text_index_t *index = indexer->index;
    apr_hash_t *position_table = indexer->position_table;

    for (hi = apr_hash_first(indexer->pool, position_table); hi;
         hi = apr_hash_next(hi)) {
        const void *key;
        apr_ssize_t klen;
        void *val;
        cache_list_head_t *entries;
        cache_list_entry_t *entry;

        apr_hash_this(hi, &key, &klen, &val);
        entries = (cache_list_head_t *) apr_hash_get(index->cache, key, klen);
        if (entries == NULL) {
            entries = (cache_list_head_t *)
                apr_palloc(index->cache_pool, sizeof(cache_list_head_t));
            APR_RING_INIT(entries, cache_list_entry_t, link);
            apr_hash_set(index->cache,
                         apr_pmemdup(index->cache_pool, key, klen), klen,
                         entries);
        }
        entry = (cache_list_entry_t *)
            apr_palloc(index->cache_pool, sizeof(cache_list_entry_t));
        entry->doc_id = indexer->doc_id;
        entry->positions = (pos_list_head_t *) val;
        APR_RING_INSERT_TAIL(entries, entry, cache_list_entry_t, link);
    }
    return RAST_OK;
}

static int
position_bytes(pos_list_head_t *positions)
{
    int len = 0;
    pos_list_entry_t *entry;

    for (entry = APR_RING_FIRST(positions);
         entry != APR_RING_SENTINEL(positions, pos_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        len += number_length(entry->pos);
    }
    return len;
}

static int
pack_position_data(char *buf, rast_doc_id_t doc_id, int bytes,
                   pos_list_head_t *positions)
{
    char *p = buf;
    pos_list_entry_t *entry;

    p += pack_number(p, doc_id);
    p += pack_number(p, bytes);
    for (entry = APR_RING_FIRST(positions);
         entry != APR_RING_SENTINEL(positions, pos_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        p += pack_number(p, entry->pos);
    }
    return p - buf;
}

static rast_error_t *
db_put(DB *db, const char *key, apr_ssize_t key_len,
       void *value, int value_len)
{
    DBT db_key = { 0 }, db_value = { 0 };
    int dberr;

    db_key.data = (char *) key;
    db_key.size = key_len;
    db_value.data = (char *) value;
    db_value.size = value_len;
    dberr = db->put(db, NULL, &db_key, &db_value, 0);
    return db_error_to_rast_error(dberr);
}

static int
pack_entries(apr_pool_t *pool, cache_list_head_t *entries, char **s,
             rast_doc_id_t *num_docs)
{
    int len = 0, i;
    char *p;
    cache_list_entry_t *entry;
    apr_array_header_t *nbytes;
    apr_pool_t *sub_pool;

    apr_pool_create(&sub_pool, pool);
    nbytes = apr_array_make(sub_pool, 1, sizeof(rast_size_t));
    for (entry = APR_RING_FIRST(entries);
         entry != APR_RING_SENTINEL(entries, cache_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        rast_size_t bytes;
        len += number_length(entry->doc_id);
        bytes = position_bytes(entry->positions);
        len += number_length(bytes);
        len += bytes;
        *(rast_size_t *) apr_array_push(nbytes) = bytes;
    }
    *s = (char *) apr_palloc(pool, len);
    p = *s;
    i = 0;
    for (entry = APR_RING_FIRST(entries);
         entry != APR_RING_SENTINEL(entries, cache_list_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        p += pack_position_data(p, entry->doc_id,
                                ((rast_size_t *) nbytes->elts)[i],
                                entry->positions);
        i++;
    }
    *num_docs = i;
    apr_pool_destroy(sub_pool);
    return len;
}

static rast_error_t *
write_new_ngram_to_rare_ngram_db(DB *rare_ngram_db,
                                 const char *key, apr_ssize_t key_len,
                                 void *data, rast_size_t data_len)
{
    return db_put(rare_ngram_db, key, key_len, data, data_len);
}

static rast_error_t *
allocate_area(rast_text_index_t *index, rast_size_t block_count,
              rast_size_t *block_no)
{
    off_t pos;

    *block_no = free_list_search(index->free_list, block_count);
    if (*block_no == FREE_AREA_NOT_FOUND) {
        if (fseeko(index->pos_file, 0, SEEK_END) == -1) {
            return os_error_to_rast_error(errno);
        }
        pos = ftello(index->pos_file);
        if (pos == -1) {
            return os_error_to_rast_error(errno);
        }
        *block_no = pos / index->pos_block_size;
        if (ftruncate(fileno(index->pos_file),
                      pos + block_count * index->pos_block_size) == -1) {
            return os_error_to_rast_error(errno);
        }
    }
    else {
        if (fseeko(index->pos_file,
                   *block_no * index->pos_block_size, SEEK_SET) == -1) {
            return os_error_to_rast_error(errno);
        }
    }
    return RAST_OK;
}

static rast_error_t *
write_ngram_to_ngram_db(apr_pool_t *pool, rast_text_index_t *index,
                        const char *key, apr_ssize_t key_len,
                        rast_doc_id_t num_docs,
                        const char *new_data, rast_size_t new_data_len,
                        const char *old_data, rast_size_t old_data_len)
{
    rast_size_t block_no, block_count;
    rast_uint_t buf[4];
    rast_size_t new_nbytes;
    rast_error_t *error;

    if (old_data == NULL) {
        new_nbytes = new_data_len;
    }
    else {
        new_nbytes = old_data_len + new_data_len;
    }

    block_count = new_nbytes / index->pos_block_size + 1;
    error = allocate_area(index, block_count, &block_no);
    if (error != RAST_OK) {
        return error;
    }
    if (old_data != NULL) {
        if (fwrite(old_data, 1, old_data_len, index->pos_file) < old_data_len) {
            return os_error_to_rast_error(errno);
        }
    }
    if (fwrite(new_data, 1, new_data_len, index->pos_file) < new_data_len) {
        return os_error_to_rast_error(errno);
    }
    buf[0] = rast_fix_byte_order(block_no, index->is_native);
    buf[1] = rast_fix_byte_order(block_count, index->is_native);
    buf[2] = rast_fix_byte_order(new_nbytes, index->is_native);
    buf[3] = rast_fix_byte_order(num_docs, index->is_native);
    return db_put(index->ngram_db, key, key_len, buf, sizeof(buf));
}

static rast_size_t
calc_new_block_count(rast_size_t nbytes, rast_size_t block_count,
                     rast_size_t pos_block_size)
{
    int needed_block_count, new_block_count;

    needed_block_count = nbytes / pos_block_size + 1;
    new_block_count = block_count * 2;
    if (new_block_count < needed_block_count) {
        new_block_count = needed_block_count;
    }
    return new_block_count;
}

static rast_error_t *
write_dup_ngram(apr_pool_t *pool, rast_text_index_t *index,
                const char *ngram, apr_ssize_t ngram_len,
                rast_doc_id_t num_docs,
                const char *db_value, int db_value_len,
                const char *data, rast_size_t data_len)
{
    rast_size_t block_no, block_count, nbytes;
    rast_size_t new_block_no, new_block_count, new_nbytes;
    rast_uint_t buf[4];
    rast_size_t *p;
    rast_doc_id_t new_num_docs;

    p = (rast_size_t *) db_value;
    block_no = rast_fix_byte_order(p[0], index->is_native);
    block_count = rast_fix_byte_order(p[1], index->is_native);
    nbytes = rast_fix_byte_order(p[2], index->is_native);
    new_num_docs = num_docs + rast_fix_byte_order(p[3], index->is_native);

    new_nbytes = nbytes + data_len;
    if (new_nbytes <= block_count * index->pos_block_size) {
        if (fseeko(index->pos_file,
                   block_no * index->pos_block_size + nbytes,
                   SEEK_SET) == -1) {
            return os_error_to_rast_error(errno);
        }
        if (fwrite(data, 1, data_len, index->pos_file) < data_len) {
            return os_error_to_rast_error(errno);
        }
        new_block_no = block_no;
        new_block_count = block_count;
    }
    else {
        rast_error_t *error;
        char *old_data;

        if (fseeko(index->pos_file,
                   block_no * index->pos_block_size, SEEK_SET) == -1) {
            return os_error_to_rast_error(errno);
        }
        old_data = (char *) apr_palloc(pool, nbytes);
        if ((fread(old_data, 1, nbytes, index->pos_file)) < nbytes) {
            return os_error_to_rast_error(errno);
        }
        free_list_add(index->free_list, block_no, block_count);
        new_block_count = calc_new_block_count(new_nbytes, block_count,
                                               index->pos_block_size);
        error = allocate_area(index, new_block_count, &new_block_no);
        if (error != RAST_OK) {
            return error;
        }
        if (fwrite(old_data, 1, nbytes, index->pos_file) < nbytes) {
            return os_error_to_rast_error(errno);
        }
        if (fwrite(data, 1, data_len, index->pos_file) < data_len) {
            return os_error_to_rast_error(errno);
        }
    }

    buf[0] = rast_fix_byte_order(new_block_no, index->is_native);
    buf[1] = rast_fix_byte_order(new_block_count, index->is_native);
    buf[2] = rast_fix_byte_order(new_nbytes, index->is_native);
    buf[3] = rast_fix_byte_order(new_num_docs, index->is_native);
    return db_put(index->ngram_db, ngram, ngram_len,
                  buf, sizeof(rast_size_t) * 4);
}

rast_error_t *
rast_text_index_sync(rast_text_index_t *index)
{
    apr_pool_t *pool, *sub_pool;
    apr_hash_index_t *hi;
    int dberr;
    rast_error_t *error;

    if (index->flags & RAST_DB_RDONLY) {
        return rast_error(RAST_ERROR_BAD_DB, "can't sync read-only db");
    }
    apr_pool_create(&pool, index->pool);
    apr_pool_create(&sub_pool, pool);
    for (hi = apr_hash_first(pool, index->cache); hi; hi = apr_hash_next(hi)) {
        const void *ngram;
        apr_ssize_t ngram_len;
        void *value;
        cache_list_head_t *entries;
        cache_list_entry_t *entry;
        DBT db_key = { 0 }, db_value = { 0 };
        int dberr, data_len;
        char *data;
        rast_doc_id_t num_docs;
        rast_uint_t buf[4];
        char rbuf[RARE_NGRAM_DB_MAX_VALUE_SIZE];

        apr_hash_this(hi, &ngram, &ngram_len, &value);
        entries = (cache_list_head_t *) value;
        data_len = pack_entries(sub_pool, entries, &data, &num_docs);
        db_key.data = (void *) ngram;
        db_key.size = ngram_len;
        db_value.data = &buf;
        db_value.ulen = sizeof(buf);
        db_value.flags = DB_DBT_USERMEM;
        dberr = index->ngram_db->get(index->ngram_db, NULL,
                                     &db_key, &db_value, 0);
        switch (dberr) {
        case 0:
            error = write_dup_ngram(sub_pool, index, ngram, ngram_len,
                                    num_docs,
                                    db_value.data, db_value.size,
                                    data, data_len);
            if (error != RAST_OK) {
                apr_pool_destroy(pool);
                apr_pool_clear(index->cache_pool);
                index->cache = apr_hash_make(index->cache_pool);
                return error;
            }
            break;
        case DB_NOTFOUND:
            db_value.data = &rbuf;
            db_value.size = sizeof(rbuf);
            dberr = index->rare_ngram_db->get(index->rare_ngram_db, NULL,
                                              &db_key, &db_value, 0);
            switch (dberr) {
            case 0:
                error = write_ngram_to_ngram_db(sub_pool, index,
                                                ngram, ngram_len, num_docs + 1,
                                                data, data_len,
                                                db_value.data, db_value.size);
                if (error == RAST_OK) {
                    dberr = index->rare_ngram_db->del(index->rare_ngram_db,
                                                      NULL, &db_key, 0);
                    error = db_error_to_rast_error(dberr);
                }
                if (error != RAST_OK) {
                    apr_pool_destroy(pool);
                    apr_pool_clear(index->cache_pool);
                    index->cache = apr_hash_make(index->cache_pool);
                    return error;
                }
                break;
            case DB_NOTFOUND:
                entry = APR_RING_FIRST(entries);
                if (APR_RING_FIRST(entries) == APR_RING_LAST(entries) &&
                    (APR_RING_FIRST(entry->positions) ==
                     APR_RING_LAST(entry->positions))) {
                    DB *rare_ngram_db = index->rare_ngram_db;

                    error = write_new_ngram_to_rare_ngram_db(rare_ngram_db,
                                                             ngram, ngram_len,
                                                             data, data_len);
                }
                else {
                    error = write_ngram_to_ngram_db(sub_pool, index,
                                                    ngram, ngram_len, num_docs,
                                                    data, data_len, NULL, 0);
                }
                if (error != RAST_OK) {
                    apr_pool_destroy(pool);
                    apr_pool_clear(index->cache_pool);
                    index->cache = apr_hash_make(index->cache_pool);
                    return error;
                }
                break;
            default:
                apr_pool_destroy(pool);
                apr_pool_clear(index->cache_pool);
                index->cache = apr_hash_make(index->cache_pool);
                return db_error_to_rast_error(dberr);
            }
            break;
        default:
            apr_pool_destroy(pool);
            apr_pool_clear(index->cache_pool);
            index->cache = apr_hash_make(index->cache_pool);
            return db_error_to_rast_error(dberr);
        }
        apr_pool_clear(sub_pool);
    }
    apr_pool_destroy(pool);
    if ((dberr = index->ngram_db->sync(index->ngram_db, 0)) != 0) {
        return db_error_to_rast_error(dberr);
    }
    if ((dberr = index->rare_ngram_db->sync(index->rare_ngram_db, 0)) != 0) {
        return db_error_to_rast_error(dberr);
    }
    if (fflush(index->pos_file) == EOF) {
        return os_error_to_rast_error(errno);
    }
    if ((error = save_free_list(index)) != RAST_OK) {
        return error;
    }
    apr_pool_clear(index->cache_pool);
    index->cache = apr_hash_make(index->cache_pool);
    return RAST_OK;
}

typedef struct {
    rast_doc_id_t doc_id;
    const char *pos_ptr;
    int pos_bytes;
    int bytes;
} doc_t;

static inline rast_error_t *
parse_doc(const char *s, const char *s_end, doc_t *result)
{
    const char *p = s;
    int n;

    n = unpack_number(p, s_end - p, &result->doc_id);
    if (n == 0) {
        return rast_error(RAST_ERROR_GENERAL, "no doc_id found");
    }
    p += n;
    n = unpack_number(p, s_end - p, &result->pos_bytes);
    if (n == 0) {
        return rast_error(RAST_ERROR_GENERAL, "no pos_bytes found");
    }
    p += n;
    result->pos_ptr = p;
    p += result->pos_bytes;
    result->bytes = p - s;
    return RAST_OK;
}

typedef struct pos_cursor_t pos_cursor_t;

typedef struct {
    int (*get_current)(pos_cursor_t *base);
    void (*next)(pos_cursor_t *base);
    int (*is_done)(pos_cursor_t *base);
} pos_cursor_type_t;

struct pos_cursor_t {
    pos_cursor_type_t *type;
    rast_pos_t offset;
};

typedef struct {
    pos_cursor_t base;
    int n;
    int next_offset;
    const char *ptr;
    const char *end;
} single_pos_cursor_t;

static int
single_pos_cursor_get_current(pos_cursor_t *base)
{
    single_pos_cursor_t *pos = (single_pos_cursor_t *) base;

    if (pos->next_offset == 0) {
        pos->next_offset =
            unpack_number(pos->ptr, pos->end - pos->ptr, &pos->n);
    }
    return pos->n;
}

static void
single_pos_cursor_next(pos_cursor_t *base)
{
    single_pos_cursor_t *pos = (single_pos_cursor_t *) base;

    if (pos->next_offset == 0) {
        pos->next_offset =
            unpack_number(pos->ptr, pos->end - pos->ptr, &pos->n);
    }
    pos->ptr += pos->next_offset;
    pos->next_offset = 0;
}

static int
single_pos_cursor_is_done(pos_cursor_t *base)
{
    single_pos_cursor_t *pos = (single_pos_cursor_t *) base;

    return pos->ptr >= pos->end;
}

static pos_cursor_type_t single_pos_cursor_type = {
    single_pos_cursor_get_current,
    single_pos_cursor_next,
    single_pos_cursor_is_done,
};

typedef struct {
    pos_cursor_t base;
    pos_cursor_t **cursors;
    int num_cursors;
    pos_cursor_t *min_cursor;
} multi_pos_cursor_t;

static rast_pos_t
get_min_pos(multi_pos_cursor_t *cursor)
{
    rast_pos_t n, min_pos;
    pos_cursor_t *c;
    int i;

    min_pos = RAST_POS_MAX;
    for (i = 0; i < cursor->num_cursors; i++) {
        c = *(cursor->cursors + i);
        if (!c->type->is_done(c)) {
            n = c->type->get_current(c);
            if (n < min_pos) {
                min_pos = n;
                cursor->min_cursor = c;
            }
        }
    }
    return min_pos;
}

static int
multi_pos_cursor_get_current(pos_cursor_t *base)
{
    multi_pos_cursor_t *cursor = (multi_pos_cursor_t *) base;

    if (cursor->min_cursor == NULL) {
        return get_min_pos(cursor);
    }
    return cursor->min_cursor->type->get_current(cursor->min_cursor);
}

static void
multi_pos_cursor_next(pos_cursor_t *base)
{
    multi_pos_cursor_t *cursor = (multi_pos_cursor_t *) base;

    if (cursor->min_cursor == NULL) {
        get_min_pos(cursor);
    }
    cursor->min_cursor->type->next(cursor->min_cursor);
    cursor->min_cursor = NULL;
}

static int
multi_pos_cursor_is_done(pos_cursor_t *base)
{
    multi_pos_cursor_t *cursor = (multi_pos_cursor_t *) base;
    pos_cursor_t *c;
    int i;

    for (i = 0; i < cursor->num_cursors; i++) {
        c = *(cursor->cursors + i);
        if (!c->type->is_done(c)) {
            return 0;
        }
    }
    return 1;
}

static pos_cursor_type_t multi_pos_cursor_type = {
    multi_pos_cursor_get_current,
    multi_pos_cursor_next,
    multi_pos_cursor_is_done,
};

typedef struct ngram_t ngram_t;

typedef struct {
    rast_error_t *(*get_current_doc_id)(ngram_t *ngram, rast_doc_id_t *doc_id);
    rast_error_t *(*next_doc)(ngram_t *ngram);
    int (*is_done)(ngram_t *ngram);
    rast_error_t *(*pos_cursor_create)(ngram_t *base, apr_pool_t *pool,
                                       pos_cursor_t **base_pos_cursor);
} ngram_type_t;

struct ngram_t {
    ngram_type_t *type;
    rast_pos_t offset;
    rast_doc_id_t num_docs;
    const char *ptr;
    int nbytes;
    int nchars;
    APR_RING_ENTRY(ngram_t) link;
};

APR_RING_HEAD(ngram_head_t, ngram_t);
typedef struct ngram_head_t ngram_head_t;

typedef struct {
    ngram_t base;
    apr_pool_t *pool;
    const char *doc_ptr;
    const char *doc_end;
    doc_t current;
    int parsed;
} single_ngram_t;

static rast_error_t *
single_ngram_get_current_doc_id(ngram_t *base, rast_doc_id_t *doc_id)
{
    single_ngram_t *ngram = (single_ngram_t *) base;

    if (!ngram->parsed) {
        rast_error_t *error = parse_doc(ngram->doc_ptr, ngram->doc_end,
                                        &ngram->current);
        if (error != RAST_OK) {
            return error;
        }
        ngram->parsed = 1;
    }
    *doc_id = ngram->current.doc_id;
    return RAST_OK;
}

static rast_error_t *
single_ngram_next_doc(ngram_t *base)
{
    single_ngram_t *ngram = (single_ngram_t *) base;
    rast_error_t *error;

    if (!ngram->parsed) {
        error = parse_doc(ngram->doc_ptr, ngram->doc_end, &ngram->current);
        if (error != RAST_OK) {
            return error;
        }
    }
    ngram->doc_ptr += ngram->current.bytes;
    ngram->parsed = 0;
    return RAST_OK;
}

static int
single_ngram_is_done(ngram_t *base)
{
    single_ngram_t *ngram = (single_ngram_t *) base;
    return ngram->doc_ptr >= ngram->doc_end;
}

static rast_error_t *
single_ngram_pos_cursor_create(ngram_t *base, apr_pool_t *pool,
                               pos_cursor_t **base_pos_cursor)
{
    single_ngram_t *ngram = (single_ngram_t *) base;
    rast_doc_id_t doc_id;
    single_pos_cursor_t *pos_cursor;

    single_ngram_get_current_doc_id(base, &doc_id);
    pos_cursor = (single_pos_cursor_t *)
        apr_palloc(pool, sizeof(single_pos_cursor_t));
    pos_cursor->base.type = &single_pos_cursor_type;
    pos_cursor->base.offset = base->offset;
    pos_cursor->n = 0;
    pos_cursor->next_offset = 0;
    pos_cursor->ptr = ngram->current.pos_ptr;
    pos_cursor->end = ngram->current.pos_ptr + ngram->current.pos_bytes;
    *base_pos_cursor = (pos_cursor_t *) pos_cursor;
    return RAST_OK;
}

static ngram_type_t single_ngram_type = {
    single_ngram_get_current_doc_id,
    single_ngram_next_doc,
    single_ngram_is_done,
    single_ngram_pos_cursor_create,
};

static rast_error_t *
get_packed_positions_from_pos_file(apr_pool_t *pool,
                                   rast_text_index_t *index,
                                   rast_size_t *db_values,
                                   const char **doc_ptr, rast_size_t *doc_len,
                                   rast_doc_id_t *num_docs)
{
    rast_size_t block_no, block_count;
    off_t offset;

    block_no = rast_fix_byte_order(db_values[0], index->is_native);
    block_count = rast_fix_byte_order(db_values[1], index->is_native);
    *doc_len = rast_fix_byte_order(db_values[2], index->is_native);
    *num_docs = rast_fix_byte_order(db_values[3], index->is_native);
    offset = block_no * index->pos_block_size;
    if (fseeko(index->pos_file, offset, SEEK_SET) == -1) {
        return os_error_to_rast_error(errno);
    }
    *doc_ptr = (const char *) apr_palloc(pool, *doc_len);
    if (fread((char *) *doc_ptr, 1, *doc_len, index->pos_file) < 1) {
        if (feof(index->pos_file)) {
            return rast_error(RAST_ERROR_EOF, NULL);
        }
        return os_error_to_rast_error(errno);
    }
    return RAST_OK;
}

static rast_error_t *
single_ngram_get_packed_positions(apr_pool_t *pool,
                                  rast_text_index_t *index,
                                  rast_token_t *token,
                                  const char **doc_ptr, rast_size_t *doc_len,
                                  rast_doc_id_t *num_docs)
{
    DBT db_key = { 0 }, db_value = { 0 };
    rast_error_t *error;
    int dberr;
    rast_uint_t buf[4];
    char rbuf[RARE_NGRAM_DB_MAX_VALUE_SIZE];
    cache_list_head_t *cache_entries;

    db_key.data = (char *) token->ptr;
    db_key.size = token->nbytes;
    db_value.data = &buf;
    db_value.ulen = sizeof(buf);
    db_value.flags = DB_DBT_USERMEM;
    dberr = index->ngram_db->get(index->ngram_db, NULL, &db_key, &db_value, 0);
    switch (dberr) {
    case 0:
        error = get_packed_positions_from_pos_file(pool, index, buf,
                                                   doc_ptr, doc_len, num_docs);
        if (error != RAST_OK) {
            return error;
        }
        break;
    case DB_NOTFOUND:
        db_value.data = &rbuf;
        db_value.ulen = sizeof(rbuf);
        dberr = index->rare_ngram_db->get(index->rare_ngram_db, NULL,
                                          &db_key, &db_value, 0);
        switch (dberr) {
        case 0:
            *doc_ptr = (char *) apr_pmemdup(pool,
                                            db_value.data, db_value.size);
            *doc_len = db_value.size;
            *num_docs = 0;
            break;
        case DB_NOTFOUND:
            /* not found ngram in database */
            *doc_ptr = NULL;
            *doc_len = 0;
            *num_docs = 0;
            break;
        default:
            return db_error_to_rast_error(dberr);
        }
        break;
    default:
        return db_error_to_rast_error(dberr);
    }

    cache_entries = (cache_list_head_t *)
        apr_hash_get(index->cache, token->ptr, token->nbytes);
    if (cache_entries != NULL) {
        char *s, *new_doc_ptr;
        int num_docs, nbytes, new_doc_len;

        nbytes = pack_entries(pool, cache_entries, &s, &num_docs);
        new_doc_len = *doc_len + nbytes;
        new_doc_ptr = (char *) apr_palloc(pool, new_doc_len);
        memcpy(new_doc_ptr, *doc_ptr, *doc_len);
        memcpy(new_doc_ptr + *doc_len, s, nbytes);
        *doc_ptr = new_doc_ptr;
        *doc_len = new_doc_len;
    }

    return RAST_OK;
}

static rast_error_t *
single_ngram_allocate(apr_pool_t *pool, rast_text_index_t *index,
                      rast_token_t *token, rast_doc_id_t num_docs,
                      const char *doc_ptr, rast_size_t doc_len, ngram_t **base)
{
    single_ngram_t *ngram;

    ngram = (single_ngram_t *) apr_palloc(pool, sizeof(single_ngram_t));
    ngram->base.type = &single_ngram_type;
    ngram->base.offset = token->pos;
    ngram->base.num_docs = num_docs;
    ngram->base.ptr = token->ptr;
    ngram->base.nbytes = token->nbytes;
    ngram->base.nchars = token->nchars;
    ngram->pool = pool;
    ngram->doc_ptr = doc_ptr;
    ngram->doc_end = ngram->doc_ptr + doc_len;
    ngram->parsed = 0;
    *base = (ngram_t *) ngram;
    return RAST_OK;
}

static rast_error_t *
single_ngram_create(apr_pool_t *pool, rast_text_index_t *index,
                    rast_token_t *token, ngram_t **base)
{
    const char *doc_ptr;
    rast_size_t doc_len;
    rast_error_t *error;
    rast_doc_id_t num_docs;

    error = single_ngram_get_packed_positions(pool, index, token, &doc_ptr,
                                              &doc_len, &num_docs);
    if (error != RAST_OK) {
        return error;
    }
    if (doc_ptr == NULL) {
        *base = NULL;
        return RAST_OK;
    }
    return single_ngram_allocate(pool, index, token, num_docs,
                                 doc_ptr, doc_len, base);
}

typedef struct {
    ngram_t base;
    ngram_head_t *ngrams;
    rast_doc_id_t min_doc_id;
    pos_cursor_t **cursors_buffer;
} multi_ngram_t;

static rast_error_t *
multi_ngram_get_current_doc_id(ngram_t *base, rast_doc_id_t *doc_id)
{
    multi_ngram_t *ngram = (multi_ngram_t *) base;
    ngram_t *n;
    rast_doc_id_t id;
    rast_error_t *error;

    if (ngram->min_doc_id == (rast_doc_id_t) -1) {
      for (n = APR_RING_FIRST(ngram->ngrams);
           n != APR_RING_SENTINEL(ngram->ngrams, ngram_t, link);
           n = APR_RING_NEXT(n, link)) {
            if (!n->type->is_done(n)) {
                error = n->type->get_current_doc_id(n, &id);
                if (error != RAST_OK) {
                    return error;
                }
                if (id < ngram->min_doc_id) {
                    ngram->min_doc_id = id;
                }
            }
        }
    }
    *doc_id = ngram->min_doc_id;
    return RAST_OK;
}

static rast_error_t *
multi_ngram_next_doc(ngram_t *base)
{
    multi_ngram_t *ngram = (multi_ngram_t *) base;
    ngram_t *n;
    rast_doc_id_t doc_id;
    rast_error_t *error;

    if (ngram->min_doc_id == (rast_doc_id_t) -1) {
        return rast_error(RAST_ERROR_GENERAL,
                          "must be called after get_current_doc_id.");
    }
    for (n = APR_RING_FIRST(ngram->ngrams);
         n != APR_RING_SENTINEL(ngram->ngrams, ngram_t, link);
         n = APR_RING_NEXT(n, link)) {
        if (!n->type->is_done(n)) {
            error = n->type->get_current_doc_id(n, &doc_id);
            if (error != RAST_OK) {
                return error;
            }
            if (ngram->min_doc_id == doc_id) {
                error = n->type->next_doc(n);
                if (error != RAST_OK) {
                    return error;
                }
            }
        }
    }
    ngram->min_doc_id = (rast_doc_id_t) -1;
    return RAST_OK;
}

static int
multi_ngram_is_done(ngram_t *base)
{
    multi_ngram_t *ngram = (multi_ngram_t *) base;
    ngram_t *n;

    for (n = APR_RING_FIRST(ngram->ngrams);
         n != APR_RING_SENTINEL(ngram->ngrams, ngram_t, link);
         n = APR_RING_NEXT(n, link)) {
        if (!n->type->is_done(n)) {
            return 0;
        }
    }
    return 1;
}

static rast_error_t *
multi_ngram_pos_cursor_create(ngram_t *base, apr_pool_t *pool,
                              pos_cursor_t **base_pos_cursor)
{
    multi_ngram_t *ngram = (multi_ngram_t *) base;
    ngram_t *n;
    rast_doc_id_t doc_id;
    multi_pos_cursor_t *pos_cursor;
    int num_cursors;
    pos_cursor_t *c;
    rast_error_t *error;

    num_cursors = 0;
    for (n = APR_RING_FIRST(ngram->ngrams);
         n != APR_RING_SENTINEL(ngram->ngrams, ngram_t, link);
         n = APR_RING_NEXT(n, link)) {
        if (!n->type->is_done(n)) {
            error = n->type->get_current_doc_id(n, &doc_id);
            if (error != RAST_OK) {
                return error;
            }
            if (ngram->min_doc_id == doc_id) {
                error = n->type->pos_cursor_create(n, pool, &c);
                if (error != RAST_OK) {
                    return error;
                }
                *(ngram->cursors_buffer + num_cursors) = c;
                num_cursors++;
            }
        }
    }
    pos_cursor =
        (multi_pos_cursor_t *) apr_palloc(pool, sizeof(multi_pos_cursor_t));
    pos_cursor->base.type = &multi_pos_cursor_type;
    pos_cursor->base.offset = base->offset;
    pos_cursor->cursors = ngram->cursors_buffer;
    pos_cursor->num_cursors = num_cursors;
    pos_cursor->min_cursor = NULL;
    *base_pos_cursor = (pos_cursor_t *) pos_cursor;
    return RAST_OK;
}

static ngram_type_t multi_ngram_type = {
    multi_ngram_get_current_doc_id,
    multi_ngram_next_doc,
    multi_ngram_is_done,
    multi_ngram_pos_cursor_create,
};

static rast_error_t *
multi_ngram_get_packed_positions(apr_pool_t *pool,
                                 rast_text_index_t *index,
                                 rast_token_t *token, rast_doc_id_t *num_docs,
                                 ngram_head_t *ngrams, int *num_ngrams)
{
    DBC *cursor;
    DBT db_key = { 0 }, db_value = { 0 };
    const char *doc_ptr;
    rast_size_t doc_len;
    rast_doc_id_t child_num_docs;
    ngram_t *ngram;
    int dberr;
    rast_error_t *error;
    rast_token_t t;
    rast_uint_t buf[4];
    apr_hash_index_t *hi;
    apr_pool_t *sub_pool;

    dberr = index->ngram_db->cursor(index->ngram_db, NULL, &cursor, 0);
    if (dberr != 0) {
        return db_error_to_rast_error(dberr);
    }

    apr_pool_create(&sub_pool, pool);
    db_key.data = (char *) token->ptr;
    db_key.size = token->nbytes;
    db_value.data = &buf;
    db_value.ulen = sizeof(buf);
    db_value.flags = DB_DBT_USERMEM;
    dberr = cursor->c_get(cursor, &db_key, &db_value, DB_SET_RANGE);
    switch (dberr) {
    case 0:
        while (dberr != DB_NOTFOUND &&
               memcmp(token->ptr, db_key.data, token->nbytes) == 0) {
            error = get_packed_positions_from_pos_file(pool, index, buf,
                                                       &doc_ptr, &doc_len,
                                                       &child_num_docs);
            t.ptr = apr_pmemdup(pool, db_key.data, db_key.size);
            t.nbytes = db_key.size;
            t.nchars =
                token->nchars + rast_count_chars(index->encoding_module,
                                                 db_key.data + token->nbytes,
                                                 db_key.size - token->nbytes,
                                                 sub_pool);
            t.pos = token->pos;
            if (error != RAST_OK) {
                cursor->c_close(cursor);
                return error;
            }
            single_ngram_allocate(pool, index, &t, child_num_docs,
                                  doc_ptr, doc_len, &ngram);
            APR_RING_INSERT_TAIL(ngrams, ngram, ngram_t, link);
            (*num_docs) += child_num_docs;
            (*num_ngrams)++;
            dberr = cursor->c_get(cursor, &db_key, &db_value, DB_NEXT);
        }
        break;
    case DB_NOTFOUND:
        break;
    default:
        cursor->c_close(cursor);
        return db_error_to_rast_error(dberr);
    }
    cursor->c_close(cursor);

    for (hi = apr_hash_first(sub_pool, index->cache); hi;
         hi = apr_hash_next(hi)) {
        const void *key;
        apr_ssize_t klen;
        void *val;

        apr_hash_this(hi, &key, &klen, &val);
        if (memcmp(key, token->ptr, token->nbytes) == 0) {
            cache_list_head_t *cache_entries = (cache_list_head_t *) val;
            char *s;

            doc_len = pack_entries(pool, cache_entries, &s, &child_num_docs);
            doc_ptr = s;

            t.ptr = apr_pmemdup(pool, key, klen);
            t.nbytes = klen;
            t.nchars =
                token->nchars + rast_count_chars(index->encoding_module,
                                                 key + token->nbytes,
                                                 klen - token->nbytes,
                                                 sub_pool);
            t.pos = token->pos;
            single_ngram_allocate(pool, index, &t, child_num_docs,
                                  doc_ptr, doc_len, &ngram);
            APR_RING_INSERT_TAIL(ngrams, ngram, ngram_t, link);
            (*num_docs) += child_num_docs;
            (*num_ngrams)++;
        }
    }

    return RAST_OK;
}

static rast_error_t *
multi_ngram_get_rare_packed_positions(apr_pool_t *pool,
                                      rast_text_index_t *index,
                                      rast_token_t *token,
                                      rast_doc_id_t *num_docs,
                                      ngram_head_t *ngrams, int *num_ngrams)
{
    DBC *cursor;
    DBT db_key = { 0 }, db_value = { 0 };
    const char *doc_ptr;
    rast_size_t doc_len;
    ngram_t *ngram;
    int dberr;
    char buf[RARE_NGRAM_DB_MAX_VALUE_SIZE];

    dberr = index->rare_ngram_db->cursor(index->rare_ngram_db, NULL,
                                         &cursor, 0);
    if (dberr != 0) {
        return db_error_to_rast_error(dberr);
    }
    db_key.data = (char *) token->ptr;
    db_key.size = token->nbytes;
    db_value.data = &buf;
    db_value.ulen = sizeof(buf);
    db_value.flags = DB_DBT_USERMEM;
    dberr = cursor->c_get(cursor, &db_key, &db_value, DB_SET_RANGE);
    switch (dberr) {
    case 0:
        while (dberr != DB_NOTFOUND &&
               db_key.size >= token->nbytes &&
               memcmp(token->ptr, db_key.data, token->nbytes) == 0) {
            doc_ptr = (char *) apr_pmemdup(pool, db_value.data, db_value.size);
            doc_len = db_value.size;
            single_ngram_allocate(pool, index, token, 1, doc_ptr, doc_len,
                                  &ngram);
            APR_RING_INSERT_TAIL(ngrams, ngram, ngram_t, link);
            (*num_docs)++;
            (*num_ngrams)++;
            dberr = cursor->c_get(cursor, &db_key, &db_value, DB_NEXT);
        }
        break;
    case DB_NOTFOUND:
        break;
    default:
        cursor->c_close(cursor);
        return db_error_to_rast_error(dberr);
    }
    cursor->c_close(cursor);
    return RAST_OK;
}

static rast_error_t *
multi_ngram_create(apr_pool_t *pool, rast_text_index_t *index,
                   rast_token_t *token, ngram_t **base)
{
    rast_doc_id_t num_docs;
    multi_ngram_t *ngram;
    ngram_head_t *ngrams;
    int num_ngrams;
    rast_error_t *error;

    ngrams = (ngram_head_t *) apr_palloc(pool, sizeof(ngram_head_t));
    APR_RING_INIT(ngrams, ngram_t, link);
    num_docs = 0;
    num_ngrams = 0;
    error = multi_ngram_get_packed_positions(pool, index, token, &num_docs,
                                             ngrams, &num_ngrams);
    if (error != RAST_OK) {
        return error;
    }
    error = multi_ngram_get_rare_packed_positions(pool, index, token,
                                                  &num_docs,
                                                  ngrams, &num_ngrams);
    if (error != RAST_OK) {
        return error;
    }
    if (num_ngrams == 0) {
        *base = NULL;
        return RAST_OK;
    }
    ngram = (multi_ngram_t *) apr_palloc(pool, sizeof(multi_ngram_t));
    ngram->base.type = &multi_ngram_type;
    ngram->base.offset = token->pos;
    ngram->base.num_docs = num_docs;
    ngram->base.ptr = token->ptr;
    ngram->base.nbytes = token->nbytes;
    ngram->base.nchars = token->nchars;
    ngram->ngrams = ngrams;
    ngram->min_doc_id = (rast_doc_id_t) -1;
    ngram->cursors_buffer = (pos_cursor_t **)
        apr_palloc(pool, sizeof(pos_cursor_t *) * num_ngrams);
    *base = (ngram_t *) ngram;
    return RAST_OK;
}

static rast_error_t *
get_ngrams(apr_pool_t *pool, rast_text_index_t *index,
           const char *term, ngram_head_t **ngrams, int *num_ngrams)
{
    rast_tokenizer_t *tokenizer;
    rast_token_t token;
    ngram_t *ngram;
    rast_error_t *error;

    *ngrams = (ngram_head_t *) apr_palloc(pool, sizeof(ngram_head_t));
    APR_RING_INIT(*ngrams, ngram_t, link);
    *num_ngrams = 0;
    tokenizer = rast_search_tokenizer_create(pool, index->encoding_module,
                                             term, strlen(term));
    while (!rast_search_tokenizer_is_done(tokenizer)) {
        rast_search_tokenizer_get_current(tokenizer, &token);
        if (token.is_complete) {
            error = single_ngram_create(pool, index, &token, &ngram);
        }
        else {
            error = multi_ngram_create(pool, index, &token, &ngram);
        }
        if (error != RAST_OK) {
            return error;
        }
        if (ngram == NULL) {
            *ngrams = NULL;
            *num_ngrams = 0;
            return RAST_OK;
        }
        APR_RING_INSERT_TAIL(*ngrams, ngram, ngram_t, link);
        (*num_ngrams)++;

        rast_search_tokenizer_next(tokenizer);
    }
    return RAST_OK;
}

static void
optimize_ngrams(ngram_head_t *ngrams, int *num_ngrams)
{
    ngram_t *ngram, *next;
    rast_size_t next_offset, min_doc_nums;

    ngram = APR_RING_FIRST(ngrams);
    min_doc_nums = ngram->num_docs;
    while (ngram != APR_RING_SENTINEL(ngrams, ngram_t, link)) {
        if (ngram->num_docs < min_doc_nums) {
            min_doc_nums = ngram->num_docs;
        }
        ngram = APR_RING_NEXT(ngram, link);
    }

    next_offset = 0;
    ngram = APR_RING_FIRST(ngrams);
    while (ngram != APR_RING_SENTINEL(ngrams, ngram_t, link)) {
        next = APR_RING_NEXT(ngram, link);
        if (ngram->offset == next_offset ||
            next == APR_RING_SENTINEL(ngrams, ngram_t, link) ||
            ngram->num_docs == min_doc_nums) {
            next_offset = ngram->offset + ngram->nchars;
        }
        else {
            APR_RING_REMOVE(ngram, link);
            (*num_ngrams)--;
        }
        ngram = next;
    }
}

static rast_error_t *
is_candidate_document(apr_pool_t *pool, rast_doc_id_t base_doc_id,
                      ngram_head_t *ngrams, int num_ngrams, int *is_candidate)
{
    ngram_t *ngram;
    rast_doc_id_t doc_id;
    rast_error_t *error;

    ngram = APR_RING_NEXT(APR_RING_FIRST(ngrams), link);
    while (ngram != APR_RING_SENTINEL(ngrams, ngram_t, link)) {
        doc_id = (rast_doc_id_t) -1;
        while (!ngram->type->is_done(ngram)) {
            error = ngram->type->get_current_doc_id(ngram, &doc_id);
            if (error != RAST_OK) {
                return error;
            }
            if (base_doc_id <= doc_id) {
                break;
            }
            ngram->type->next_doc(ngram);
        }
        if (doc_id == (rast_doc_id_t) -1 || base_doc_id != doc_id) {
            *is_candidate = 0;
            return RAST_OK;
        }
        ngram = APR_RING_NEXT(ngram, link);
    }
    *is_candidate = 1;
    return RAST_OK;
}

static rast_error_t *
create_pos_cursors(apr_pool_t *pool, ngram_head_t *ngrams,
                   pos_cursor_t **pos_cursors, int *num_pos_cursors)
{
    rast_error_t *error;
    int i;
    ngram_t *ngram;
    pos_cursor_t *cursor;

    i = 0;
    for (ngram = APR_RING_FIRST(ngrams);
         ngram != APR_RING_SENTINEL(ngrams, ngram_t, link);
         ngram = APR_RING_NEXT(ngram, link)) {
        error = ngram->type->pos_cursor_create(ngram, pool, &cursor);
        if (error != RAST_OK) {
            return error;
        }
        *(pos_cursors + i) = cursor;
        i++;
    }
    *num_pos_cursors = i;
    return RAST_OK;
}

static inline int
is_valid_ngram(rast_pos_t base, pos_cursor_t *cursor)
{
    rast_pos_t n;

    while (!cursor->type->is_done(cursor)) {
        n = cursor->type->get_current(cursor);
        if (n >= cursor->offset) {
            if (n - cursor->offset == base) {
                return 1;
            }
            else if (n - cursor->offset > base) {
                return 0;
            }
        }
        cursor->type->next(cursor);
    }
    return 0;
}

static int
check_positions(pos_cursor_t **pos_cursors, rast_size_t num_pos_cursors)
{
    pos_cursor_t *cursor;
    rast_pos_t n, base;
    int i;

    cursor = *(pos_cursors + 0);
    n = cursor->type->get_current(cursor);
    if (n < cursor->offset) {
        return 0;
    }
    base = n - cursor->offset;

    for (i = 1; i < num_pos_cursors; i++) {
        cursor = *(pos_cursors + i);
        if (!is_valid_ngram(base, cursor)) {
            return 0;
        }
    }
    return 1;
}

static rast_error_t *
count_terms(apr_pool_t *pool, ngram_head_t *ngrams, pos_cursor_t **pos_cursors,
            int need_tf, int *count, rast_pos_t *first_pos)
{
    rast_size_t num_pos_cursors;
    pos_cursor_t *cursor;
    apr_pool_t *sub_pool;
    rast_error_t *error;

    apr_pool_create(&sub_pool, pool);
    error = create_pos_cursors(pool, ngrams, pos_cursors, &num_pos_cursors);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }
    *count = 0;
    cursor = *(pos_cursors + 0);
    while (!cursor->type->is_done(cursor)) {
        if (check_positions(pos_cursors, num_pos_cursors)) {
            if (*count == 0) {
                *first_pos = cursor->type->get_current(cursor);
            }
            (*count)++;
            if (!need_tf) {
                apr_pool_destroy(sub_pool);
                return RAST_OK;
            }
        }
        cursor->type->next(cursor);
    }
    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

static rast_candidate_t *
create_candidate(apr_pool_t *pool, rast_doc_id_t doc_id,
                 int count, rast_pos_t pos, int need_tf)
{
    rast_candidate_t *result = (rast_candidate_t *)
        apr_palloc(pool, sizeof(rast_candidate_t));
    rast_term_frequency_t *tf;

    result->doc_id = doc_id;
    APR_RING_INIT(&result->terms, rast_term_frequency_t, link);
    if (need_tf) {
        tf = (rast_term_frequency_t *)
            apr_palloc(pool, sizeof(rast_term_frequency_t));
        tf->count = count;
        tf->pos = pos;
        APR_RING_INSERT_TAIL(&result->terms, tf, rast_term_frequency_t, link);
    }
    return result;
}

rast_error_t *
rast_text_index_search(rast_text_index_t *index, const char *s, int need_tf,
                       rast_query_result_t **result, apr_pool_t *pool)
{
    rast_error_t *error;
    apr_pool_t *sub_pool;
    ngram_head_t *ngrams;
    int num_ngrams;
    ngram_t *ngram;
    rast_doc_id_t doc_id;
    int is_candidate, count;
    rast_pos_t pos;
    rast_term_t *term = NULL;
    pos_cursor_t **pos_cursors;

    apr_pool_create(&sub_pool, pool);
    error = get_ngrams(sub_pool, index, s, &ngrams, &num_ngrams);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }
    *result = rast_query_result_create(pool);
    if (need_tf) {
        term = (rast_term_t *) apr_palloc(pool, sizeof(rast_term_t));
        term->term = s;
        term->doc_count = 0;
        APR_RING_INSERT_TAIL(&(*result)->terms, term, rast_term_t, link);
    }
    if (ngrams == NULL || num_ngrams <= 0) {
        /* should be error? */
        apr_pool_destroy(sub_pool);
        return RAST_OK;
    }
    optimize_ngrams(ngrams, &num_ngrams);
    pos_cursors = (pos_cursor_t **)
        apr_palloc(sub_pool, sizeof(pos_cursor_t *) * num_ngrams);
    ngram = APR_RING_FIRST(ngrams);
    while (!ngram->type->is_done(ngram)) {
        error = ngram->type->get_current_doc_id(ngram, &doc_id);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
        error = is_candidate_document(sub_pool, doc_id, ngrams, num_ngrams,
                                      &is_candidate);
        if (error != RAST_OK) {
            apr_pool_destroy(sub_pool);
            return error;
        }
        if (is_candidate) {
            error = count_terms(pool, ngrams, pos_cursors, need_tf,
                                &count, &pos);
            if (error != RAST_OK) {
                apr_pool_destroy(sub_pool);
                return error;
            }
            if (count > 0) {
                rast_candidate_t *candidate =
                    create_candidate(pool, doc_id, count, pos, need_tf);
                APR_RING_INSERT_TAIL(&(*result)->candidates, candidate,
                                     rast_candidate_t, link);
                if (need_tf) {
                    term->doc_count++;
                }
            }
        }
        ngram->type->next_doc(ngram);
    }
    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

static rast_error_t *
pack_doc(doc_t *doc, char *s, off_t *nbytes)
{
    char *p;
    int n;

    p = s;
    n = pack_number(p, doc->doc_id);
    if (n == 0) {
        return rast_error(RAST_ERROR_GENERAL, "invalid doc_id");
    }
    p += n;

    n = pack_number(p, doc->pos_bytes);
    if (n == 0) {
        return rast_error(RAST_ERROR_GENERAL, "invalid position size");
    }
    p += n;
    memcpy(p, doc->pos_ptr, doc->pos_bytes);
    p += doc->pos_bytes;
    *nbytes = p - s;
    
    return RAST_OK;
}

struct ngram_db_entry_t {
    const char *token;
    int token_nbytes;
    int num_docs;
    off_t block_nbytes;
    rast_size_t block_no;
    rast_size_t block_count;
    APR_RING_ENTRY(ngram_db_entry_t) link;
};

typedef struct ngram_db_entry_t ngram_db_entry_t;
APR_RING_HEAD(ngram_db_head_t, ngram_db_entry_t);
typedef struct ngram_db_head_t ngram_db_head_t;

static int
compare_ngram_db_entry_func(const void *v1, const void *v2)
{
    int result;
    const ngram_db_entry_t *entry1 = *(const ngram_db_entry_t **) v1;
    const ngram_db_entry_t *entry2 = *(const ngram_db_entry_t **) v2;

    result = entry2->block_nbytes - entry1->block_nbytes;
    if (result != 0) {
        return result;
    }
    return entry2->num_docs - entry1->num_docs;
}

typedef struct {
    DBC *cursor;
    DBT db_key;
    DBT db_value;
    int dberr;
    apr_pool_t *pool;
} ngram_db_cursor_t;

static rast_error_t *
ngram_db_cursor_create(ngram_db_cursor_t **result,
                       DB *ngram_db, DB_TXN *bdb_txn, apr_pool_t *pool)
{
    ngram_db_cursor_t *cursor;
    int dberr;
    apr_pool_t *sub_pool;

    apr_pool_create(&sub_pool, pool);
    cursor = (ngram_db_cursor_t *)
        apr_palloc(sub_pool, sizeof(ngram_db_cursor_t));

    dberr = ngram_db->cursor(ngram_db, bdb_txn, &cursor->cursor, 0);
    if (dberr != 0) {
        apr_pool_destroy(sub_pool);
        return db_error_to_rast_error(dberr);
    }

    memset(&cursor->db_key, 0, sizeof(DBT));
    memset(&cursor->db_value, 0, sizeof(DBT));
    dberr = cursor->cursor->c_get(cursor->cursor,
                                  &cursor->db_key, &cursor->db_value,
                                  DB_FIRST);
    if (dberr != 0 && dberr != DB_NOTFOUND) {
        cursor->cursor->c_close(cursor->cursor);
        apr_pool_destroy(sub_pool);
        return db_error_to_rast_error(dberr);
    }
    cursor->dberr = dberr;
    cursor->pool = sub_pool;

    *result = cursor;
    return RAST_OK;
}

static inline int
ngram_db_cursor_is_done(ngram_db_cursor_t *cursor)
{
    return cursor->dberr != 0;
}

static inline void
ngram_db_cursor_get_next(ngram_db_cursor_t *cursor)
{
    cursor->dberr = cursor->cursor->c_get(cursor->cursor,
                                          &cursor->db_key, &cursor->db_value,
                                          DB_NEXT);
}

static inline rast_error_t *
ngram_db_cursor_close(ngram_db_cursor_t *cursor)
{
    int dberr;
    apr_pool_t *pool;

    dberr = cursor->dberr;
    pool = cursor->pool;
    cursor->cursor->c_close(cursor->cursor);
    apr_pool_destroy(pool);

    if (dberr == DB_NOTFOUND) {
        return RAST_OK;
    }
    return db_error_to_rast_error(dberr);
}

static rast_error_t *
create_new_pos_data(rast_text_index_t *old_index, apr_hash_t *id_map_table,
                    DB_TXN *bdb_txn,
                    ngram_db_head_t **ngram_db_ring, int *num_entries,
                    FILE *tmp_file, apr_pool_t *pool)
{
    off_t offset;
    ngram_db_cursor_t *cursor;
    rast_error_t *error;
    apr_pool_t *sub_pool, *doc_pool;

    error = ngram_db_cursor_create(&cursor, old_index->ngram_db, bdb_txn,
                                   pool);
    if (error != RAST_OK) {
        return error;
    }

    apr_pool_create(&sub_pool, cursor->pool);
    apr_pool_create(&doc_pool, cursor->pool);

    offset = 0;
    *num_entries = 0;
    *ngram_db_ring = (ngram_db_head_t *)
        apr_palloc(pool, sizeof(ngram_db_head_t));
    APR_RING_INIT(*ngram_db_ring, ngram_db_entry_t, link);
    while(!ngram_db_cursor_is_done(cursor)) {
        rast_size_t doc_nbytes;
        rast_doc_id_t num_docs;
        const char *doc_ptr, *doc_ptr_end;
        doc_t doc;
        ngram_db_entry_t *entry;
        char *s;
        off_t nbytes;

        error = get_packed_positions_from_pos_file(doc_pool, old_index,
                                                   (rast_size_t *)
                                                   cursor->db_value.data,
                                                   &doc_ptr, &doc_nbytes,
                                                   &num_docs);
        if (error != RAST_OK) {
            ngram_db_cursor_close(cursor);
            return error;
        }

        doc_ptr_end = doc_ptr + doc_nbytes;
        entry = (ngram_db_entry_t *)
            apr_palloc(pool, sizeof(ngram_db_entry_t));
        entry->token = apr_pmemdup(pool,
                                   cursor->db_key.data, cursor->db_key.size);
        entry->token_nbytes = cursor->db_key.size;
        entry->num_docs = 0;
        entry->block_nbytes = 0;
        while (doc_ptr < doc_ptr_end) {
            rast_doc_id_t *new_doc_id;

            error = parse_doc(doc_ptr, doc_ptr_end, &doc);
            if (error != RAST_OK) {
                ngram_db_cursor_close(cursor);
                return error;
            }

            doc_ptr += doc.bytes;
            new_doc_id = (rast_doc_id_t *) apr_hash_get(id_map_table,
                                                        &doc.doc_id,
                                                        sizeof(rast_doc_id_t));
            if (new_doc_id == NULL) {
                continue;
            }

            doc.doc_id = *new_doc_id;
            s = (char *) apr_palloc(sub_pool, doc.bytes);
            error = pack_doc(&doc, s, &nbytes);
            if (error != RAST_OK) {
                ngram_db_cursor_close(cursor);
                return error;
            }
            if (fwrite(s, 1, nbytes, tmp_file) < nbytes) {
                ngram_db_cursor_close(cursor);
                return os_error_to_rast_error(errno);
            }

            entry->num_docs++;
            entry->block_nbytes += nbytes;
            apr_pool_clear(sub_pool);
        }

        apr_pool_clear(doc_pool);
        if (entry->num_docs > 0) {
            off_t new_offset;

            entry->block_no = offset / old_index->pos_block_size;
            entry->block_count =
                entry->block_nbytes / old_index->pos_block_size + 1;
            new_offset = offset + entry->block_count *
                old_index->pos_block_size;
            fseeko(tmp_file, new_offset, SEEK_SET);

            APR_RING_INSERT_TAIL(*ngram_db_ring, entry, ngram_db_entry_t,
                                 link);
            (*num_entries)++;

            offset = new_offset;
        }

        ngram_db_cursor_get_next(cursor);
    }
    ftruncate(fileno(tmp_file), offset);
    return ngram_db_cursor_close(cursor);
}

static rast_error_t *
create_optimized_pos_file(ngram_db_entry_t **ngram_db_ary, int num_entries,
                          rast_size_t pos_block_size,
                          FILE *tmp_file, FILE *new_pos_file,
                          apr_pool_t *pool)
{
    rast_size_t block_no = 0;
    off_t offset, nbytes;
    char *s;
    apr_pool_t *sub_pool;
    int i;

    apr_pool_create(&sub_pool, pool);
    for (i = 0; i < num_entries; i++) {
        offset = ngram_db_ary[i]->block_no * pos_block_size;
        if (fseeko(tmp_file, offset, SEEK_SET) == -1) {
            apr_pool_destroy(sub_pool);
            return os_error_to_rast_error(errno);
        }
        nbytes = ngram_db_ary[i]->block_count * pos_block_size;
        s = (char *) apr_palloc(sub_pool, nbytes);
        if (fread(s, 1, nbytes, tmp_file) < nbytes) {
            apr_pool_destroy(sub_pool);
            return os_error_to_rast_error(errno);
        }

        if (fwrite(s, 1, nbytes, new_pos_file) < nbytes) {
            apr_pool_destroy(sub_pool);
            return os_error_to_rast_error(errno);
        }
        ngram_db_ary[i]->block_no = block_no;
        block_no += ngram_db_ary[i]->block_count;
        apr_pool_clear(sub_pool);
    }
    apr_pool_destroy(sub_pool);
    return RAST_OK;
}

static rast_error_t *
create_optimized_ngram_db(ngram_db_entry_t **ngram_db_ary, int num_entries,
                          DB *old_ngram_db, DB *new_ngram_db,
                          DB_TXN *bdb_txn, int is_native, apr_pool_t *pool)
{
    ngram_db_cursor_t *cursor;
    apr_hash_t *ngram_db_hash;
    apr_pool_t *sub_pool;
    rast_error_t *error;
    int i;

    error = ngram_db_cursor_create(&cursor, old_ngram_db, bdb_txn, pool);
    if (error != RAST_OK) {
        return error;
    }

    apr_pool_create(&sub_pool, pool);
    ngram_db_hash = apr_hash_make(sub_pool);
    for (i = 0; i < num_entries; i++) {
        apr_hash_set(ngram_db_hash,
                     ngram_db_ary[i]->token, ngram_db_ary[i]->token_nbytes,
                     ngram_db_ary[i]);
    }

    while (!ngram_db_cursor_is_done(cursor)) {
        ngram_db_entry_t *entry;

        entry = (ngram_db_entry_t *)
            apr_hash_get(ngram_db_hash,
                         cursor->db_key.data, cursor->db_key.size);
        if (entry != NULL) {
            DBT db_value = { 0 };
            rast_size_t ngram_db_value[4];
            int dberr;

            ngram_db_value[0] = rast_fix_byte_order(entry->block_no,
                                                    is_native);
            ngram_db_value[1] = rast_fix_byte_order(entry->block_count,
                                                    is_native);
            ngram_db_value[2] = rast_fix_byte_order(entry->block_nbytes,
                                                    is_native);
            ngram_db_value[3] = rast_fix_byte_order(entry->num_docs,
                                                    is_native);

            db_value.data = ngram_db_value;
            db_value.size = sizeof(rast_size_t) * 4;
            dberr = new_ngram_db->put(new_ngram_db, bdb_txn,
                                      &cursor->db_key, &db_value, 0);
            if (dberr != 0) {
                apr_pool_destroy(sub_pool);
                ngram_db_cursor_close(cursor);
                return db_error_to_rast_error(dberr);
            }
        }
        ngram_db_cursor_get_next(cursor);
    }

    apr_pool_destroy(sub_pool);

    return ngram_db_cursor_close(cursor);
}

static rast_error_t *
create_optimized_rare_ngram_db(apr_hash_t *id_map_table,
                               DB *old_rare_ngram_db,
                               DB *new_rare_ngram_db, DB_TXN *bdb_txn,
                               int is_native, apr_pool_t *pool)
{
    ngram_db_cursor_t *cursor;
    int dberr;
    apr_pool_t *sub_pool;
    rast_error_t *error;

    error = ngram_db_cursor_create(&cursor, old_rare_ngram_db, bdb_txn, pool);
    if (error != RAST_OK) {
        return error;
    }

    apr_pool_create(&sub_pool, cursor->pool);
    while(!ngram_db_cursor_is_done(cursor)) {
        char *p, *p_end;
        char *positions;
        int n;
        rast_doc_id_t old_doc_id, *new_doc_id;
        rast_size_t pos_bytes;

        p = (char *) cursor->db_value.data;
        p_end = p + cursor->db_value.size;

        n = unpack_number(p, p_end - p, &old_doc_id);
        p += n;
        positions = p;
        n = unpack_number(p, p_end - p, &pos_bytes);
        pos_bytes += n;

        new_doc_id = (rast_doc_id_t *) apr_hash_get(id_map_table, &old_doc_id,
                                                    sizeof(rast_doc_id_t));
        if (new_doc_id != NULL) {
            char *s;
            DBT db_value = { 0 };

            s = apr_palloc(sub_pool, cursor->db_value.size);
            p = s;
            n = pack_number(p, *new_doc_id);
            p += n;
            memcpy(p, positions, pos_bytes);

            db_value.data = s;
            db_value.size = n + pos_bytes;
            dberr = new_rare_ngram_db->put(new_rare_ngram_db, bdb_txn,
                                           &cursor->db_key, &db_value, 0);
            if (dberr != 0) {
                ngram_db_cursor_close(cursor);
                return db_error_to_rast_error(dberr);
            }
            apr_pool_clear(sub_pool);
        }

        ngram_db_cursor_get_next(cursor);
    }
    return ngram_db_cursor_close(cursor);
}

static rast_error_t *
create_optimized_text_index(rast_text_index_t *old_index,
                            rast_text_index_t *new_index,
                            const char *new_index_name,
                            apr_hash_t *id_map_table, DB_TXN *bdb_txn,
                            apr_pool_t *pool)
{
    char *tmp_filename;
    FILE *tmp_file;
    ngram_db_entry_t *entry, **ngram_db_ary;
    ngram_db_head_t *ngram_db_ring;
    int num_entries, i;
    apr_pool_t *sub_pool;
    rast_error_t *error;

    tmp_filename = apr_pstrcat(pool, new_index_name, "-1.tmp", NULL);
    tmp_file = fopen(tmp_filename, "w+");
    if (tmp_file == NULL) {
        return os_error_to_rast_error(errno);
    }

    error = create_new_pos_data(old_index, id_map_table, bdb_txn,
                                &ngram_db_ring, &num_entries, tmp_file, pool);
    if (error != RAST_OK) {
        fclose(tmp_file);
        (void) apr_file_remove(tmp_filename, pool);
        return error;
    }

    i = 0;
    ngram_db_ary = (ngram_db_entry_t **)
        apr_palloc(pool, sizeof(ngram_db_entry_t *) * num_entries);
    for (entry = APR_RING_FIRST(ngram_db_ring);
         entry != APR_RING_SENTINEL(ngram_db_ring, ngram_db_entry_t, link);
         entry = APR_RING_NEXT(entry, link)) {
        ngram_db_ary[i] = entry;
        i++;
    }
    qsort(ngram_db_ary, num_entries, sizeof(ngram_db_entry_t *),
          compare_ngram_db_entry_func);

    apr_pool_create(&sub_pool, pool);

    error = create_optimized_pos_file(ngram_db_ary, num_entries,
                                      old_index->pos_block_size,
                                      tmp_file, new_index->pos_file, sub_pool);
    fclose(tmp_file);
    (void) apr_file_remove(tmp_filename, pool);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }

    apr_pool_clear(sub_pool);
    error = create_optimized_ngram_db(ngram_db_ary, num_entries,
                                     old_index->ngram_db, new_index->ngram_db,
                                     bdb_txn, old_index->is_native, sub_pool);
    if (error != RAST_OK) {
        apr_pool_destroy(sub_pool);
        return error;
    }

    apr_pool_clear(sub_pool);
    error = create_optimized_rare_ngram_db(id_map_table,
                                           old_index->rare_ngram_db,
                                           new_index->rare_ngram_db,
                                           bdb_txn, old_index->is_native,
                                           sub_pool);
    apr_pool_destroy(sub_pool);
    return error;
}

rast_error_t *
rast_text_index_optimize(DB_ENV *bdb_env, DB_TXN *bdb_txn, int lorder,
                         rast_text_index_t *old_index,
                         const char *new_index_name,
                         apr_hash_t *id_map_table)
{
    apr_pool_t *pool;
    rast_text_index_t *new_index;
    rast_error_t *error;

    apr_pool_create(&pool, old_index->pool);
    error = rast_text_index_open(&new_index, new_index_name, RAST_DB_RDWR,
                                 old_index->encoding_module,
                                 bdb_env, lorder,
                                 old_index->pos_block_size, pool);
    if (error != RAST_OK) {
        apr_pool_destroy(pool);
        return error;
    }

    error = create_optimized_text_index(old_index, new_index, new_index_name,
                                        id_map_table, bdb_txn, pool);

    rast_text_index_close(new_index);
    apr_pool_destroy(pool);
    return error;
}

/* vim: set filetype=c sw=4 expandtab : */
