# prime/engine/engine-basic.rb
# $Id: engine-sary.rb,v 1.4 2005/03/07 07:51:32 komatsu Exp $
#
# Copyright (C) 2002 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'prime/taiyaki'
require 'sary'
require 'prime/engine/engine'
require 'prime/prime-dict-config.rb'

$engine_class_name = 'PrimeEngineSary'

class PrimeEngineSary < PrimeEngine
  def initialize (dict_file)
    super()

    @name = "Basic sary meta-engine"
    @id   = "sary"
    @description = "Sary engine"

    @max_candidates = 15
    @dict_file    = dict_file
    initialize_dict()
  end

  def initialize_dict ()
    @dict         = Sary::Searcher.new(@dict_file)
    @dict_literal = Sary::Searcher.new(@dict_file, @dict_file + "_literal.ary")
    @partdict     = Sary::Searcher.new(@partdict_file)
#     @dict.enable_cache()
#     @dict_literal.enable_cache()
#     @partdict.enable_cache()
  end

  def search (query)
    if query.input == [""] or @dict.nil? then
      return PrimeWordList.new()
    end
    case query.method
    when :prefix then
      key = query.input
      max = 10
      word_lines = lookup_dict(key, max)
    when :exact then
      query_format = (query.pos ? "%s\t#{query.pos}\t" : "%s\t")
      key = query.input.map{|input|
        format(query_format, input)
      }
      max   = nil
      word_lines = lookup_dict(key, max)
    when :literal_prefix then
      key = query.input
      max = 10

      word_lines = lookup_dict(key, max, @dict_literal)
    when :literal_exact then
      query_format = "%s\t"
      key = query.input.map{|input|
        format(query_format, input)
      }
      max   = nil
      word_lines = lookup_dict(key, max, @dict_literal)
    else
      return PrimeWordList.new()
    end
    word_list = PrimeWordList.new()
    word_lines.each {|word_line|
      word_list << parse_word(word_line)
    }
    return word_list
  end

  def check_existence (pron, literal, pos)
    query = [pron, pos, literal].join("\t")
    if @dict.search(query) then
      return true
    else
      return false
    end
  end

  def get_pos_data (string)
    pos_data = {}
    return pos_data if @partdict.nil?

    string.increase {|pron|
      if @partdict.search(pron + "\t") then
        (_pron, *pos_list) = @partdict.get_next_context_line.chomp.split(/\t/)
        pos_data[pron] = pos_list
      end
    }
    return pos_data
  end

  private
  def parse_word(line)
    (pron, pos, literal, score, *data_list) = line.split(/\t/)
    data = {}
    data_list.each {|item|
      (key, value) = item.split(/=/)
      data[key] = value
    }
    word = PrimeWord.new(pron, literal, pos, score.to_i + 10000, data)
    return word
  end

  def lookup_dict (query_lines, max = nil, dict = @dict)
    results = []
    return results if (max.non_nil? and max <= 0)

    if !(query_lines.empty?) and dict.multi_search(query_lines) then
      dict.sort_occurrences
      while line = dict.get_next_context_line do
	if max and (results.length > max) then
	  break
	end
	results << line.chomp
      end
    end
    return results
  end
end

