ParseOptions

class
Superclass
rb_cObject
Extended With
Loggability
LinkParser::DeprecationUtilities

LinkParser parse options class. Instances of this class are used to specify the different parameters that are used to parse sentences. Examples of the kinds of things that are controlled by ParseOptions include maximum parsing time and memory, whether to use null-links, and whether or not to use “panic” mode. This data structure is passed in to the various parsing and printing routines along with the sentence.

Public Class Methods

anchor
LinkParser::ParseOptions.new( opthash ) → obj

Create a new ParseOptions object and set values from opthash.

po = LinkParser::ParseOptions.new( min_null_count: 1, verbosity: 0 )
static VALUE
rlink_parseopts_init( int argc, VALUE *argv, VALUE self )
{
        if ( ! check_parseopts(self) ) {
                Parse_Options opts;
                VALUE opthash = Qnil;

                rlink_log_obj( self, "debug", "Initializing a ParseOptions: %p", self );
                DATA_PTR( self ) = opts = parse_options_create();

                rb_scan_args( argc, argv, "01", &opthash );
                if ( RTEST(opthash) ) {
                        rlink_log_obj( self, "debug", "Setting options from an opthash." );
                        rb_funcall( self, rb_intern("merge!"), 1, opthash );
                }
        }

        else {
                rb_raise( rb_eRuntimeError, "Cannot re-initialize a ParseOptions object." );
        }

        return self;
}
anchor
option_names()

Return an Array of valid option names as Symbols.

# File lib/linkparser/parseoptions.rb, line 20
def self::option_names
        return @option_names ||= instance_methods( false ).
                grep( /^[a-z].*=$/ ).map {|sym| sym.to_s.chomp('=').to_sym }
end

Public Instance Methods

anchor
all_short_connectors= boolean

If true, then all connectors have length restrictions imposed on them – they can be no farther than #short_length apart. This is used when parsing in “panic” mode, for example.

static VALUE
rlink_parseopts_set_all_short_connectors( VALUE self, VALUE val )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_all_short_connectors( opts, RTEST(val) );
        return val;
}
anchor
all_short_connectors? → true or false

Get the value of the all_short_connectors option.

static VALUE
rlink_parseopts_get_all_short_connectors_p( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_all_short_connectors( opts );
        return rval ? Qtrue : Qfalse;
}
anchor
cost_model_type → Symbol

Get the cost model type for ranking linkages.

static VALUE
rlink_parseopts_get_cost_model_type( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        Cost_Model_type model = parse_options_get_cost_model_type( opts );
        VALUE model_name = Qnil;

        switch( model ) {
        case VDAL:
                model_name = vdal_sym;
                break;
        case CORPUS:
                model_name = corpus_sym;
                break;
        default:
                rb_bug( "Unhandled cost model type %d", model );
        }

        return model_name;
}
anchor
cost_model_type = :vdal
cost_model_type = :corpus

The cost model type for ranking linkages. Currently, there are two models: VDAL (:vdal) and CORPUS (:corpus). The VDAL model ranks parses from lowest to highest cost in and-cost, disjunct-cost, unused-word-cost and structure-violations-cost. The CORPUS model ranks parses according to the frequency of use of disjuncts, based on a statistical analysis of a collection of texts. If you haven't compiled the link-grammar library with support for the CORPUS cost model, attempting to set it to this will raise an exception.

static VALUE
rlink_parseopts_set_cost_model_type( VALUE self, VALUE model_name )
{
        Parse_Options opts = get_parseopts( self );
        Cost_Model_type model;

        if ( model_name == vdal_sym ) {
                rlink_log_obj( self, "debug", "Selected the 'VDAL' cost model" );
                model = VDAL;
        } else if ( model_name == corpus_sym ) {
                rlink_log_obj( self, "debug", "Selected the 'CORPUS' cost model" );
                model = CORPUS;
        } else {
                rb_raise( rb_eArgError, "Unknown cost model %s (expected either :vdal or :corpus).",
                         RSTRING_PTR(rb_inspect( model_name )) );
        }

        rlink_log_obj( self, "info", "Setting the cost model to %s", model == VDAL ? "VDAL" : "CORPUS" );
        parse_options_reset_resources( opts );
        parse_options_set_cost_model_type( opts, model );

        if ( parse_options_get_cost_model_type(opts) != model ) {
                rb_raise( rb_eArgError,
                        "Couldn't set the cost model: is link-grammar possibly compiled without it?" );
        }

        return model_name;
}
anchor
disjunct_cost → fixnum

Get the maximum disjunct cost used during parsing.

static VALUE
rlink_parseopts_get_disjunct_cost( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_disjunct_cost( opts );
        return INT2FIX( rval );
}
anchor
disjunct_cost= fixnum

Determines the maximum disjunct cost used during parsing, where the cost of a disjunct is equal to the maximum cost of all of its connectors. The default is that all disjuncts, no matter what their cost, are considered.

static VALUE
rlink_parseopts_set_disjunct_cost( VALUE self, VALUE disjunct_cost )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_disjunct_cost( opts, NUM2INT(disjunct_cost) );
        return disjunct_cost;
}
anchor
initialize_copy(p1)

Copy constructor

static VALUE
rlink_parseopts_init_copy( VALUE self, VALUE other )
{
        if ( ! check_parseopts(self) ) {
                Parse_Options opts;

                rlink_log_obj( self, "debug", "Initializing a copied ParseOptions: %p", self );
                DATA_PTR( self ) = opts = parse_options_create();
                rb_funcall( self, rb_intern("merge!"), 1, other );

                rb_call_super( 1, &other );
        }

        else {
                rb_raise( rb_eRuntimeError, "Can't recopy a ParseOptions object." );
        }

        return self;
}
anchor
islands_ok= boolean

This option determines whether or not “islands” of links are allowed. For example, the following linkage has an island:

  +------Wd-----+
  |     +--Dsu--+---Ss--+-Paf-+      +--Dsu--+---Ss--+--Pa-+
  |     |       |       |     |      |       |       |     |
///// this sentence.n is.v false.a this sentence.n is.v true.a
static VALUE
rlink_parseopts_set_islands_ok( VALUE self, VALUE islands_ok )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_islands_ok( opts, RTEST(islands_ok) );
        return islands_ok;
}
anchor
islands_ok? → true or false

Get the value of the islands_ok option.

static VALUE
rlink_parseopts_get_islands_ok_p( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_islands_ok( opts );
        return rval ? Qtrue : Qfalse;
}
anchor
linkage_limit → fixnum

This parameter determines the maximum number of linkages that are considered in post-processing. If more than linkage_limit linkages are found, then a random sample of linkage_limit is chosen for post-processing. When this happen a warning is displayed at verbosity levels greater than 1.

static VALUE
rlink_parseopts_get_linkage_limit( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_linkage_limit( opts );
        return INT2FIX( rval );
}
anchor
linkage_limit= fixnum

This parameter determines the maximum number of linkages that are considered in post-processing. If more than linkage_limit linkages are found, then a random sample of linkage_limit is chosen for post-processing. When this happen a warning is displayed at verbosity levels greater than 1.

static VALUE
rlink_parseopts_set_linkage_limit( VALUE self, VALUE linkage_limit )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_linkage_limit( opts, NUM2INT(linkage_limit) );
        return linkage_limit;
}
anchor
max_memory → fixnum

Get the value of the #max_memory option.

static VALUE
rlink_parseopts_get_max_memory( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_max_memory( opts );
        return INT2FIX( rval );
}
anchor
max_memory= fixnum

Determines the maximum memory allowed during parsing. This is used just as #max_parse_time is, so that the parsing process is terminated as quickly as possible after the total memory (including that allocated to all dictionaries, etc.) exceeds the maximum allowed.

static VALUE
rlink_parseopts_set_max_memory( VALUE self, VALUE mem )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_max_memory( opts, NUM2INT(mem) );
        return mem;
}
anchor
max_null_count → fixnum

Get the maximum number of null links allowed in a parse.

static VALUE
rlink_parseopts_get_max_null_count( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_max_null_count( opts );
        return INT2FIX( rval );
}
anchor
max_null_count= fixnum

Set the maximum number of null links allowed in a parse.

static VALUE
rlink_parseopts_set_max_null_count( VALUE self, VALUE null_count )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_max_null_count( opts, NUM2INT(null_count) );
        return null_count;
}
anchor
max_parse_time → fixnum

Get the number of seconds of the #max_parse_time option.

static VALUE
rlink_parseopts_get_max_parse_time( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_max_parse_time( opts );
        return INT2FIX( rval );
}
anchor
max_parse_time= seconds

Determines the approximate maximum time that parsing is allowed to take. The way it works is that after this time has expired, the parsing process is artificially forced to complete quickly by pretending that no further solutions (entries in the hash table) can be constructed. The actual parsing time might be slightly longer.

static VALUE
rlink_parseopts_set_max_parse_time( VALUE self, VALUE secs )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_max_parse_time( opts, NUM2INT(secs) );
        return secs;
}
anchor
memory_exhausted? → +true+ or +false+

Returns true if memory constraints were exceeded during parsing.

sentence.parse
if sentence.options.memory_exhausted?
  $stderr.puts "Parsing sentence #{sentence} ran out of memory."
end
static VALUE
rlink_parseopts_memory_exhausted_p( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_memory_exhausted( opts );
        return rval ? Qtrue : Qfalse;
}
anchor
merge( other )

Return a new LinkParser::ParseOptions with the values of the receiver merged with those from the other object.

# File lib/linkparser/parseoptions.rb, line 28
def merge( other )
        new_options = self.dup
        new_options.merge!( other )
        return new_options
end
anchor
merge!( other )

Overwrite the option settings on the receiver with those from the other object.

# File lib/linkparser/parseoptions.rb, line 37
def merge!( other )
        other.to_hash.each do |key, val|
                self.send( "#{key}=", val )
        end
end
anchor
min_null_count → fixnum

Get the minimum of null links that a parse can have.

static VALUE
rlink_parseopts_get_min_null_count( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_min_null_count( opts );
        return INT2FIX( rval );
}
anchor
min_null_count= fixnum → fixnum

Set the minimum of null links that a parse can have. A call to LinkParser::Sentence#parse will find all linkages having the minimum number of null links within the range specified by this parameter.

static VALUE
rlink_parseopts_set_min_null_count( VALUE self, VALUE null_count )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_min_null_count( opts, NUM2INT(null_count) );
        return null_count;
}
anchor
reset_resources

Reset the timer- and memory-constraint flags.

static VALUE
rlink_parseopts_reset_resources( VALUE self )
{
        Parse_Options opts = get_parseopts( self );

        parse_options_reset_resources( opts );
        return Qnil;
}
anchor
resources_exhausted? → +true+ or +false+

Returns true if the memory or timer constraints were exceeded during parsing.

sentence.parse
if sentence.options.resources_exhausted?
  $stderr.puts "Parsing sentence #{sentence} ran out of resources."
end
static VALUE
rlink_parseopts_resources_exhausted_p( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_resources_exhausted( opts );
        return rval ? Qtrue : Qfalse;
}
anchor
short_length → fixnum

Get the value of the #short_length option.

static VALUE
rlink_parseopts_get_short_length( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_short_length( opts );
        return INT2FIX( rval );
}
anchor
short_length= fixnum

The #short_length parameter determines how long the links are allowed to be. The intended use of this is to speed up parsing by not considering very long links for most connectors, since they are very rarely used in a correct parse. An entry for UNLIMITED-CONNECTORS in the dictionary will specify which connectors are exempt from the length limit.

static VALUE
rlink_parseopts_set_short_length( VALUE self, VALUE short_length )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_short_length( opts, NUM2INT(short_length) );
        return short_length;
}
anchor
spell_guessing_enabled= boolean

Enable/disable spell-guessing if it's supported.

static VALUE
rlink_parseopts_set_spell_guess( VALUE self, VALUE val )
{
#ifdef HAVE_PARSE_OPTIONS_GET_SPELL_GUESS
        Parse_Options opts = get_parseopts( self );
        parse_options_set_spell_guess( opts, RTEST(val) );
        return val;
#else
        rb_notimplement();
        return Qnil;
#endif /* HAVE_PARSE_OPTIONS_GET_SPELL_GUESS */
}
anchor
spell_guessing_enabled? → true or false

Returns true if spell-guessing is enabled. Note that a true return value doesn't mean that it's supported, only that it will be used if it is.

static VALUE
rlink_parseopts_get_spell_guess_p( VALUE self )
{
#ifdef HAVE_PARSE_OPTIONS_GET_SPELL_GUESS
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_spell_guess( opts );
        return rval ? Qtrue : Qfalse;
#else
        rb_notimplement();
        return Qnil;
#endif /* HAVE_PARSE_OPTIONS_GET_SPELL_GUESS */
}
anchor
timer_expired? → +true+ or +false+

Returns true if timer constraints were exceeded during parsing.

sentence.parse
if sentence.options.timer_expired?
  $stderr.puts "Parsing sentence #{sentence} timed out."
end
static VALUE
rlink_parseopts_timer_expired_p( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_timer_expired( opts );
        return rval ? Qtrue : Qfalse;
}
anchor
to_hash()

Return the options as a Hash.

# File lib/linkparser/parseoptions.rb, line 45
def to_hash
        return self.class.option_names.each_with_object( {} ) do |optname, accum|
                val = if self.respond_to?( "#{optname}?" )
                                self.send( "#{optname}?" )
                        else
                                self.send( optname )
                        end

                accum[ optname ] = val
        end
end
anchor
verbosity → fixnum

This gets the level of description printed to stderr/stdout about the parsing process.

static VALUE
rlink_parseopts_get_verbosity( VALUE self )
{
        Parse_Options opts = get_parseopts( self );
        int rval;

        rval = parse_options_get_verbosity( opts );
        return INT2FIX( rval );
}
anchor
verbosity= fixnum

This sets the level of description printed to stderr/stdout about the parsing process.

static VALUE
rlink_parseopts_set_verbosity( VALUE self, VALUE verbosity )
{
        Parse_Options opts = get_parseopts( self );
        parse_options_set_verbosity( opts, NUM2INT(verbosity) );
        return verbosity;
}