A Sentence is the API’s representation of an input string, tokenized and interpreted according to a specific Dictionary. After a Sentence is created and parsed, various attributes of the resulting set of linkages can be obtained.
Michael Granger <ged@FaerieMUD.org>
$Id: sentence.rb,v 23a39531870a 2011/01/11 18:18:12 ged $
Copyright (c) 2006-2011, The FaerieMUD Consortium All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the author/s, nor the names of the project's contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
A Sentence is the API’s representation of an input string, tokenized and interpreted according to a specific Dictionary. After a Sentence is created and parsed, various attributes of the resulting set of linkages can be obtained.
Create a new LinkParser::Sentence object from the given input string
# using the specified LinkParser::Dictionary.
dict = LinkParser::Dictionary.new LinkParser::Sentence.new( "The boy runs", dict ) #=> #<LinkParser::Sentence:0x5481ac>
static VALUE
rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
if ( !check_sentence(self) ) {
struct rlink_sentence *ptr;
Sentence sent;
struct rlink_dictionary *dictptr = rlink_get_dict( dictionary );
if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) )
rlink_raise_lp_error();
DATA_PTR( self ) = ptr = rlink_sentence_alloc();
ptr->sentence = sent;
ptr->dictionary = dictionary;
ptr->options = Qnil;
} else {
rb_raise( rb_eRuntimeError,
"Cannot re-initialize a sentence once it's been created." );
}
return self;
}
Element Reference—Returns the element at index, or returns a subarray starting at start and continuing for length elements, or returns a subarray specified by range. Negative indices count backward from the end of the array (-1 is the last element). Returns nil if the index (or starting index) are out of range.
sent = dict.parse( "Birds fly south for the winter." ) sent[1] # => "birds" sent[0,4] # => ["LEFT-WALL", "birds", "fly", "south"] sent[1..3] # => ["birds", "fly", "south"]
static VALUE
rlink_sentence_aref( argc, argv, self )
int argc;
VALUE *argv;
VALUE self;
{
VALUE words = rlink_sentence_words( self );
return rb_funcall2( words, rb_intern("[]"), argc, argv );
}
The maximum cost of connectors used in the i-th linkage of the sentence.
static VALUE
rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_disjunct_cost( (Sentence)ptr->sentence, FIX2INT(i) );
return INT2FIX( count );
}
Return a human-readable representation of the Sentence object.
# File lib/linkparser/sentence.rb, line 32 def inspect contents = '' if self.parsed? contents = %Q{"%s"/%d linkages/%d nulls} % [ self.to_s, self.num_linkages_found, self.null_count, ] else contents = "(unparsed)" end return "#<%s:0x%x %s>" % [ self.class.name, self.object_id / 2, contents, ] end
Returns the number of words in the tokenized sentence, including the boundary words and punctuation.
static VALUE
rlink_sentence_length( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
return INT2FIX( sentence_length((Sentence)ptr->sentence) );
}
Returns an Array of LinkParser::Linkage objects which represent the parts parsed from the sentence for the current linkage.
static VALUE
rlink_sentence_linkages( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int i, count = 0;
VALUE rary;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
rary = rb_ary_new2( count );
for ( i = 0; i < count; i++ ) {
VALUE linkage;
VALUE args[2];
args[0] = INT2FIX( i );
args[1] = self;
linkage = rb_class_new_instance( 2, args, rlink_cLinkage );
rb_ary_store( rary, i, linkage );
}
return rary;
}
Returns the number of null links that were used in parsing the sentence.
static VALUE
rlink_sentence_null_count( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_null_count( (Sentence)ptr->sentence );
return INT2FIX( count );
}
Returns the number of linkages found when parsing the sentence. This will cause the sentence to be parsed if it hasn’t been already.
static VALUE
rlink_sentence_num_linkages_found( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int i = 0;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
i = sentence_num_linkages_found( (Sentence)ptr->sentence );
return INT2FIX( i );
}
Return the number of linkages that were actually post-processed (which may be less than the number found because of the linkage_limit parameter).
static VALUE
rlink_sentence_num_linkages_post_processed( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_linkages_post_processed( (Sentence)ptr->sentence );
return INT2FIX( count );
}
Return the number of linkages that had no post-processing violations.
static VALUE
rlink_sentence_num_valid_linkages( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
return INT2FIX( count );
}
The number of post-processing violations that the i-th linkage had during the last parse.
static VALUE
rlink_sentence_num_violations( VALUE self, VALUE i ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_violations( (Sentence)ptr->sentence, FIX2INT(i) );
return INT2FIX( count );
}
Returns a ParseOptions object for the receiving sentence.
sentence.options.verbosity = 3 sentence.options.islands_ok? # -> true
static VALUE
rlink_sentence_options( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
return ptr->options;
}
Attach a parse set to this sentence and return the number of linkages
found. If any options are specified, they override those set
in the sentence’s dictionary.
static VALUE
rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
Parse_Options opts;
VALUE defopts = Qnil;
VALUE options = Qnil;
int link_count = 0;
/*
if ( RTEST(ptr->parsed_p) )
rb_raise( rlink_eLpError, "Can't reparse a sentence." );
*/
debugMsg(( "Parsing sentence <%p>", ptr ));
/* Merge the hash from this call with the one from the dict and build
Parse_Options from it. */
rb_scan_args( argc, argv, "01", &options );
defopts = rb_funcall( ptr->dictionary, rb_intern("options"), 0 );
/* Turn the option hash into a ParseOptions object, then extract the
Parse_Options struct from that */
options = rlink_make_parse_options( defopts, options );
opts = rlink_get_parseopts( options );
/* Parse the sentence */
if ( (link_count = sentence_parse( ptr->sentence, opts )) < 0 )
rlink_raise_lp_error();
ptr->options = options;
ptr->parsed_p = Qtrue;
return INT2FIX( link_count );
}
Returns true if the sentence has been parsed.
sentence.parsed? #-> false sentence.parse #-> 6 sentence.parsed? #-> true
static VALUE
rlink_sentence_parsed_p( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
return ptr->parsed_p;
}
Print out the sentence
# File lib/linkparser/sentence.rb, line 53 def to_s return self.words.join(" ") end
Returns the spelling of the n-th word in the sentence as it appears after tokenization.
static VALUE
rlink_sentence_word( VALUE self, VALUE n ) {
struct rlink_sentence *ptr = get_sentence( self );
const char *word;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
return rb_str_new2( word );
}
Returns the words of the sentence as they appear after tokenization.
sentence = LinkParser::Dictionary.new.parse( "The dogs barks." ) sentence.words #->
static VALUE
rlink_sentence_words( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
const char *word;
int i, length;
VALUE words = rb_ary_new();
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
length = sentence_length( (Sentence)ptr->sentence );
for ( i = 0; i < length; i++ ) {
word = sentence_get_word( (Sentence)ptr->sentence, i );
debugMsg(( "Word %d: <%s>", i, word ));
rb_ary_push( words, rb_str_new2(word) );
}
return words;
}
Proxy method – auto-delegate calls to the first linkage.
# File lib/linkparser/sentence.rb, line 69 def method_missing( sym, *args ) # Check both symbol and string for forward-compatibility with 1.9.x return super unless LinkParser::Linkage.instance_methods.include?( sym.to_s ) || LinkParser::Linkage.instance_methods.include?( sym ) linkage = self.linkages.first or raise LinkParser::Error, "sentence has no linkages" meth = linkage.method( sym ) self.singleton_class.send( :define_method, sym, &meth ) meth.call( *args ) rescue => err raise err, err.message, err.backtrace[ 0..-2 ] end
Return the singleton class for this object
# File lib/linkparser/sentence.rb, line 63 def singleton_class class << self; self; end end