class LinkParser::Sentence

A Sentence is the API’s representation of an input string, tokenized and interpreted according to a specific Dictionary. After a Sentence is created and parsed, various attributes of the resulting set of linkages can be obtained.

Authors

Version

$Id: sentence.rb,v 23a39531870a 2011/01/11 18:18:12 ged $

License

Copyright (c) 2006-2011, The FaerieMUD Consortium
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
  this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the author/s, nor the names of the project's
  contributors may be used to endorse or promote products derived from this
  software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

A Sentence is the API’s representation of an input string, tokenized and interpreted according to a specific Dictionary. After a Sentence is created and parsed, various attributes of the resulting set of linkages can be obtained.

Public Class Methods

LinkParser::Sentence.new( str, dict ) → sentence click to toggle source
Create a new LinkParser::Sentence object from the given input string

# using the specified LinkParser::Dictionary.

dict = LinkParser::Dictionary.new
LinkParser::Sentence.new( "The boy runs", dict )  #=> #<LinkParser::Sentence:0x5481ac>
static VALUE
rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
        if ( !check_sentence(self) ) {
                struct rlink_sentence *ptr;
                Sentence sent;
                struct rlink_dictionary *dictptr = rlink_get_dict( dictionary );

                if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) )
                        rlink_raise_lp_error();

                DATA_PTR( self ) = ptr = rlink_sentence_alloc();

                ptr->sentence = sent;
                ptr->dictionary = dictionary;
                ptr->options = Qnil;

        } else {
                rb_raise( rb_eRuntimeError,
                                  "Cannot re-initialize a sentence once it's been created." );
        }

        return self;
}

Public Instance Methods

sentence[index] → str click to toggle source
sentence[start, length] → str
sentence[range] → str

Element Reference—Returns the element at index, or returns a subarray starting at start and continuing for length elements, or returns a subarray specified by range. Negative indices count backward from the end of the array (-1 is the last element). Returns nil if the index (or starting index) are out of range.

sent = dict.parse( "Birds fly south for the winter." )

sent[1]          # => "birds"
sent[0,4]        # => ["LEFT-WALL", "birds", "fly", "south"]
sent[1..3]       # => ["birds", "fly", "south"]
static VALUE
rlink_sentence_aref( argc, argv, self )
        int argc;
        VALUE *argv;
        VALUE self;
{
        VALUE words = rlink_sentence_words( self );
        return rb_funcall2( words, rb_intern("[]"), argc, argv );
}
disjunct_cost( i ) → fixnum click to toggle source

The maximum cost of connectors used in the i-th linkage of the sentence.

static VALUE
rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int count;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_disjunct_cost( (Sentence)ptr->sentence, FIX2INT(i) );
        return INT2FIX( count );
}
inspect() click to toggle source

Return a human-readable representation of the Sentence object.

# File lib/linkparser/sentence.rb, line 32
def inspect
        contents = ''
        if self.parsed?
                contents = %Q{"%s"/%d linkages/%d nulls} % [
                        self.to_s,
                        self.num_linkages_found,
                        self.null_count,
                ]
        else
                contents = "(unparsed)"
        end

        return "#<%s:0x%x %s>" % [
                self.class.name,
                self.object_id / 2,
                contents,
        ]
end
length → fixnum click to toggle source

Returns the number of words in the tokenized sentence, including the boundary words and punctuation.

static VALUE
rlink_sentence_length( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        return INT2FIX( sentence_length((Sentence)ptr->sentence) );
}
linkages → array click to toggle source

Returns an Array of LinkParser::Linkage objects which represent the parts parsed from the sentence for the current linkage.

static VALUE
rlink_sentence_linkages( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int i, count = 0;
        VALUE rary;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
        rary = rb_ary_new2( count );

        for ( i = 0; i < count; i++ ) {
                VALUE linkage;
                VALUE args[2];

                args[0] = INT2FIX( i );
                args[1] = self;

                linkage = rb_class_new_instance( 2, args, rlink_cLinkage );
                rb_ary_store( rary, i, linkage );
        }

        return rary;
}
null_count → int click to toggle source

Returns the number of null links that were used in parsing the sentence.

static VALUE
rlink_sentence_null_count( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int count;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_null_count( (Sentence)ptr->sentence );
        return INT2FIX( count );
}
num_linkages_found → fixnum click to toggle source

Returns the number of linkages found when parsing the sentence. This will cause the sentence to be parsed if it hasn’t been already.

static VALUE
rlink_sentence_num_linkages_found( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int i = 0;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        i = sentence_num_linkages_found( (Sentence)ptr->sentence );

        return INT2FIX( i );
}
num_linkages_post_processed → fixnum click to toggle source

Return the number of linkages that were actually post-processed (which may be less than the number found because of the linkage_limit parameter).

static VALUE
rlink_sentence_num_linkages_post_processed( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int count;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_num_linkages_post_processed( (Sentence)ptr->sentence );
        return INT2FIX( count );
}
num_valid_linkages → fixnum click to toggle source

Return the number of linkages that had no post-processing violations.

static VALUE
rlink_sentence_num_valid_linkages( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int count;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
        return INT2FIX( count );
}
num_violations( i ) → fixnum click to toggle source

The number of post-processing violations that the i-th linkage had during the last parse.

static VALUE
rlink_sentence_num_violations( VALUE self, VALUE i ) {
        struct rlink_sentence *ptr = get_sentence( self );
        int count;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        count = sentence_num_violations( (Sentence)ptr->sentence, FIX2INT(i) );
        return INT2FIX( count );
}
options → parseoptions click to toggle source

Returns a ParseOptions object for the receiving sentence.

sentence.options.verbosity = 3
sentence.options.islands_ok?  # -> true
static VALUE
rlink_sentence_options( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        return ptr->options;
}
parse( options={} ) → fixnum click to toggle source

Attach a parse set to this sentence and return the number of linkages found. If any options are specified, they override those set in the sentence’s dictionary.

static VALUE
rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        Parse_Options opts;
        VALUE defopts = Qnil;
        VALUE options = Qnil;
        int link_count = 0;

        /*
        if ( RTEST(ptr->parsed_p) )
                rb_raise( rlink_eLpError, "Can't reparse a sentence." );
        */
        debugMsg(( "Parsing sentence <%p>", ptr ));

        /* Merge the hash from this call with the one from the dict and build
           Parse_Options from it. */
        rb_scan_args( argc, argv, "01", &options );
        defopts = rb_funcall( ptr->dictionary, rb_intern("options"), 0 );

        /* Turn the option hash into a ParseOptions object, then extract the
           Parse_Options struct from that  */
        options = rlink_make_parse_options( defopts, options );
        opts = rlink_get_parseopts( options );

        /* Parse the sentence */
        if ( (link_count = sentence_parse( ptr->sentence, opts )) < 0 )
                rlink_raise_lp_error();

        ptr->options = options;
        ptr->parsed_p = Qtrue;

        return INT2FIX( link_count );
}
parsed? → true or false click to toggle source

Returns true if the sentence has been parsed.

sentence.parsed?   #-> false
sentence.parse     #-> 6
sentence.parsed?   #-> true
static VALUE
rlink_sentence_parsed_p( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        return ptr->parsed_p;
}
to_s() click to toggle source

Print out the sentence

# File lib/linkparser/sentence.rb, line 53
def to_s
        return self.words.join(" ")
end
word( idx ) → str click to toggle source

Returns the spelling of the n-th word in the sentence as it appears after tokenization.

static VALUE
rlink_sentence_word( VALUE self, VALUE n ) {
        struct rlink_sentence *ptr = get_sentence( self );
        const char *word;

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
        return rb_str_new2( word );
}
words → array click to toggle source

Returns the words of the sentence as they appear after tokenization.

sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
sentence.words  #->
static VALUE
rlink_sentence_words( VALUE self ) {
        struct rlink_sentence *ptr = get_sentence( self );
        const char *word;
        int i, length;
        VALUE words = rb_ary_new();

        if ( !RTEST(ptr->parsed_p) )
                rlink_sentence_parse( 0, 0, self );

        length = sentence_length( (Sentence)ptr->sentence );
        for ( i = 0; i < length; i++ ) {
                word = sentence_get_word( (Sentence)ptr->sentence, i );
                debugMsg(( "Word %d: <%s>", i, word ));
                rb_ary_push( words, rb_str_new2(word) );
        }

        return words;
}

Protected Instance Methods

method_missing( sym, *args ) click to toggle source

Proxy method – auto-delegate calls to the first linkage.

# File lib/linkparser/sentence.rb, line 69
def method_missing( sym, *args )

        # Check both symbol and string for forward-compatibility with 1.9.x
        return super unless
                LinkParser::Linkage.instance_methods.include?( sym.to_s ) ||
                LinkParser::Linkage.instance_methods.include?( sym )

        linkage = self.linkages.first or raise LinkParser::Error, "sentence has no linkages"

        meth = linkage.method( sym )
        self.singleton_class.send( :define_method, sym, &meth )

        meth.call( *args )
rescue => err
        raise err, err.message, err.backtrace[ 0..-2 ]
end
singleton_class() click to toggle source

Return the singleton class for this object

# File lib/linkparser/sentence.rb, line 63
def singleton_class
        class << self; self; end
end