Additional high-level functionality for LinkParser::Sentence objects.
Michael Granger <ged@FaerieMUD.org>
$Id: linkage.rb,v 507ef20fc315 2011/01/11 19:06:53 ged $
Copyright © 2006-2011, The FaerieMUD Consortium All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the author/s, nor the names of the project’s contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
end)
Descriptions of the linkage types, keyed by linkage symbol
Link struct (:lword, :rword, :length, :label, :llabel, :rlabel, :desc)
Create a new LinkParser::Linkage object out of
the linkage indicated by index (a positive Integer) from the
specified sentence (a LinkParser::Sentence).
The optional options hash can be used to override the parse options of the
Sentence for the new linkage.
static VALUE
rlink_linkage_init( argc, argv, self )
int argc;
VALUE *argv;
VALUE self;
{
if ( !check_linkage(self) ) {
int i, link_index, max_index;
VALUE index, sentence, options, defopts;
struct rlink_sentence *sent_ptr;
Linkage linkage;
Parse_Options opts;
struct rlink_linkage *ptr;
i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
options = rlink_make_parse_options( defopts, options );
opts = rlink_get_parseopts( options );
sent_ptr = (struct rlink_sentence *)rlink_get_sentence( sentence );
link_index = NUM2INT(index);
max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
if ( link_index > max_index )
rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
link_index, max_index );
linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
if ( !linkage ) rlink_raise_lp_error();
DATA_PTR( self ) = ptr = rlink_linkage_alloc();
ptr->linkage = linkage;
ptr->sentence = sentence;
}
else {
rb_raise( rb_eRuntimeError,
"Cannot re-initialize a linkage once it's been created." );
}
return Qnil;
}
Returns the AND cost of the linkage, which is the difference in length between and-list elements.
static VALUE
rlink_linkage_and_cost( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval;
rval = linkage_and_cost( (Linkage)ptr->linkage );
return INT2FIX( rval );
}
Returns true if the linkage is canonical. The canonical
linkage is the one in which the minimal disjunct that ever occurrs in a
position is used in that position.
static VALUE
rlink_linkage_canonical_p( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval = 0;
rval = linkage_is_canonical( (Linkage)ptr->linkage );
return rval ? Qtrue : Qfalse;
}
If the linkage has a conjunction, combine all of the links occurring in all sublinkages together – in effect creating a “master” linkage (which may have crossing links). The union is created as another sublinkage, thus increasing the number of sublinkages by one, and is returned by this method. If the linkage has no conjunctions, computing its union has no effect. This method returns true if computing its union caused another sublinkage to be created.
static VALUE
rlink_linkage_compute_union( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int before, after;
before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
linkage_compute_union( (Linkage)ptr->linkage );
after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
return (after > before) ? Qtrue : Qfalse;
}
Return the Linkage’s constituent tree as a Array of hierarchical “CTree” structs.
sent = dict.parse( "He is a big dog." ) link = sent.linkages.first ctree = link.constituent_tree # => [#<struct Struct::LinkParserLinkageCTree label="S", children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...], start=0, end=5>]
static VALUE
rlink_linkage_constituent_tree( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
CNode *ctree = NULL;
VALUE rval = Qnil;
ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
rval = rlink_linkage_make_cnode_array( ctree );
linkage_free_constituent_tree( ctree );
return rval;
}
Return the constituent tree as a printable string.
Example:
sent = dict.parse( "He is a big dog." ) link = sent.linkages.first link.constituent_tree_string # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
static VALUE
rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
char *ctree_string = NULL;
VALUE rval = Qnil, modenum = Qnil;
int mode;
if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
mode = NUM2INT( modenum );
} else {
mode = 1;
}
if ( mode < 1 || mode > 3 )
rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
if ( ctree_string ) {
rval = rb_str_new2( ctree_string );
linkage_free_constituent_tree_str( ctree_string );
} else {
rval = Qnil;
}
return rval;
}
Get the index of the current sublinkage.
static VALUE
rlink_linkage_current_sublinkage( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval = 0;
rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
return INT2FIX( rval );
}
After this call, all operations on the linkage will refer to the index-th sublinkage. In the case of a linkage without conjunctions, this has no effect.
static VALUE
rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval = 0;
rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
return INT2FIX( rval );
}
Return a String containing a diagram of the linkage.
static VALUE
rlink_linkage_diagram( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
char *diagram_cstr;
VALUE diagram;
diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
diagram = rb_str_new2( diagram_cstr );
linkage_free_diagram( diagram_cstr );
return diagram;
}
Returns the connector or disjunct cost of the linkage.
static VALUE
rlink_linkage_disjunct_cost( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval;
rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
return INT2FIX( rval );
}
#disjunct_strings -> array
Return an Array of Strings showing the disjuncts that were actually used in association with each corresponding word in the current linkage. Each string shows the disjuncts in proper order; that is, left-to-right, in the order in which they link to other words. The returned strings can be thought of as a very precise part-of-speech-like label for each word, indicating how it was used in the given sentence; this can be useful for corpus statistics.
For a parsed version of the disjunct strings, call #disjuncts instead.
static VALUE
rlink_linkage_get_disjunct_strings( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
const char *disjunct;
int count, i;
VALUE disjuncts_ary;
count = linkage_get_num_words( (Linkage)ptr->linkage );
disjuncts_ary = rb_ary_new2( count );
for ( i = 0; i < count; i++ ) {
#ifdef HAVE_LINKAGE_GET_DISJUNCT_STR
disjunct = linkage_get_disjunct_str( (Linkage)ptr->linkage, i );
#else
disjunct = linkage_get_disjunct( (Linkage)ptr->linkage, i );
#endif
if ( disjunct ) {
rb_ary_store( disjuncts_ary, i, rb_str_new2(disjunct) );
} else {
rb_ary_store( disjuncts_ary, i, Qnil );
}
}
return disjuncts_ary;
}
Return an Array of parsed (well, just split on whitespace for now) disjunct strings for the linkage.
# File lib/linkparser/linkage.rb, line 205 def disjuncts return self.disjunct_strings.collect do |dstr| if dstr.nil? nil else dstr.split end end end
Returns true if the linkage has more than one sublinkage
(i.e., the sentence has a conjunction).
# File lib/linkparser/linkage.rb, line 37 def has_conjunction? return self.num_sublinkages > 1 end
Returns true if the linkage has inconsistent domains. –
:fixme: Find out what it means that a linkage has inconsistent domains.
static VALUE
rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval = 0;
rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
return rval ? Qtrue : Qfalse;
}
Returns true if the linkage indicates the sentence is phrased
in the imperative voice.
# File lib/linkparser/linkage.rb, line 258 def imperative? return self.links.find {|link| link.label == 'Wi' && link.rword =~ /\.v$/ } ? true : false end
Returns true if the linkage is “improper”. – :fixme: Find out
what an “improper fat linkage” is.
static VALUE
rlink_linkage_improper_p( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval = 0;
rval = linkage_is_improper( (Linkage)ptr->linkage );
return rval ? Qtrue : Qfalse;
}
Return a human-readable representation of the Sentence object.
# File lib/linkparser/linkage.rb, line 171 def inspect return %Q{#<%s:0x%x: [%d links]>} % [ self.class.name, self.object_id / 2, self.num_links ] end
Return the +index+th link.
# File lib/linkparser/linkage.rb, line 181 def link( index ) Link.new( self.words[ self.link_lword(index) ], self.words[ self.link_rword(index) ], self.link_length(index), self.link_label(index), self.link_llabel(index), self.link_rlabel(index), LINK_TYPES[ self.link_label(index).gsub(/[^A-Z]+/, '').to_sym ] ) end
Returns the total (LEN) cost of the linkage, which is the total length of all links in the sentence minus the number of words – since the total link length is never less than the number of words.
static VALUE
rlink_linkage_link_cost( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval;
rval = linkage_link_cost( (Linkage)ptr->linkage );
return INT2FIX( rval );
}
Returns the names of the domains the index-th link belongs to.
static VALUE
rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
const char **names;
int i = NUM2INT( index );
int count;
VALUE names_ary;
names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
if ( count < 0 ) return rb_ary_new();
names_ary = rb_ary_new2( count );
for ( i = 0; i < count; i++ ) {
rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
}
return names_ary;
}
#link_label( index ) -> str -- The "intersection" of the left and right connectors that comprise the link.
static VALUE
rlink_linkage_get_link_label( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
const char *label;
label = linkage_get_link_label( (Linkage)ptr->linkage, i );
if ( !label ) return Qnil;
return rb_str_new2( label );
}
#link_length( index ) -- The number of words spanned by the index-th link of the current sublinkage.
static VALUE
rlink_linkage_get_link_length( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
}
#link_llabel -> str -- The label on the left word of the index-th link of the current sublinkage.
static VALUE
rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
const char *label = NULL;
label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
if ( !label ) return Qnil;
return rb_str_new2( label );
}
#link_lword( index ) -- The number of the word on the left end of the index-th link of the current sublinkage.
static VALUE
rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
}
Returns the number of domains in the index-th link.
static VALUE
rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
int count = 0;
count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
return INT2FIX( count );
}
#link_rlabel -> str -- The label on the right word of the index-th link of the current sublinkage.
static VALUE
rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
const char *label = NULL;
label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
if ( !label ) return Qnil;
return rb_str_new2( label );
}
#link_rword( index ) -- The number of the word on the right end of the index-th link of the current sublinkage.
static VALUE
rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
struct rlink_linkage *ptr = get_linkage( self );
int i = NUM2INT( index );
return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
}
Return the Array of words in the sentence as tokenized by the parser.
# File lib/linkparser/linkage.rb, line 196 def links return ( 0...self.link_count ).collect do |i| self.link( i ) end end
Return a String containing a lists all of the links and domain names for the current sublinkage.
Example:
sent = dict.parse("I eat, therefore I think")
puts sent.linkages.first.links_and_domains
prints:
///// RW <---RW----> RW ///// (m) ///// Wd <---Wd----> Wd I.p (m) I.p CC <---CC----> CC therefore (m) I.p Sp*i <---Sp*i--> Sp eat (m) , Xd <---Xd----> Xd therefore (m) (m) therefore Wd <---Wd----> Wd I.p (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
static VALUE
rlink_linkage_links_and_domains( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
char *diagram_cstr;
VALUE diagram;
diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
diagram = rb_str_new2( diagram_cstr );
linkage_free_links_and_domains( diagram_cstr );
return diagram;
}
Return an Array of all the nouns in the linkage.
# File lib/linkparser/linkage.rb, line 245 def nouns nouns = [] self.links.each do |link| nouns << $1 if link.lword =~ /^(.*)\.n(?:-\w)?$/ nouns << $1 if link.rword =~ /^(.*)\.n(?:-\w)?$/ end return nouns.uniq end
#num_links -- The number of links used in the current sublinkage.
static VALUE
rlink_linkage_get_num_links( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
}
Return the number of sublinkages for a linkage with conjunctions, 1 otherwise.
static VALUE
rlink_linkage_num_sublinkages( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
}
#num_words -- The number of words in the sentence for which this is a linkage. Note that this function does not return the number of words used in the current sublinkage.
static VALUE
rlink_linkage_get_num_words( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
}
Return the object from the linkage.
# File lib/linkparser/linkage.rb, line 238 def object objlink = self.links.find {|link| link.rlabel[0] == OO } or return nil return objlink.rword.sub( /\.[np](?:-\w)?$/, '' ) end
Returns the macros needed to print out the linkage in a postscript file. By default, the output is just the set of postscript macros that describe the diagram. With full_doc=true a complete encapsulated postscript document is returned.
static VALUE
rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
struct rlink_linkage *ptr = get_linkage( self );
char *diagram_cstr;
VALUE diagram;
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
RTEST(full_doc) ? 1 : 0 );
diagram = rb_str_new2( diagram_cstr );
linkage_free_postscript( diagram_cstr );
return diagram;
}
Return the subject from the linkage.
# File lib/linkparser/linkage.rb, line 231 def subject subjlink = self.links.find {|link| link.llabel[0] == SS } or return nil return subjlink.lword.sub( /\.[np](?:-\w)?$/, '' ) end
Returns the unused word cost of the linkage, which corresponds to the number of null links that were required to parse it.
static VALUE
rlink_linkage_unused_word_cost( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
int rval;
rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
return INT2FIX( rval );
}
Return the verb word from the linkage.
# File lib/linkparser/linkage.rb, line 217 def verb if verblink = self.links.find {|link| link.llabel =~ /^(O([DFNTX]?)|P|BI|K|LI|MV|Q)[a-z\*]*/ } # $deferr.puts "Picking %s: LL of %p is %s" % [ link.lword, link, link.llabel ] return verblink.lword.sub( /\.v(-d)?$/, '' ) elsif verblink = self.links.find {|link| link.rlabel =~ /^(SI|S|AF)[a-z\*]*/ } # $deferr.puts "Picking %s: RL of %p is %s" % [ link.rword, link, link.rlabel ] return verblink.rword.sub( /\.v(-d)?$/, '' ) else return nil end end
If the linkage violated any post-processing rules, this method returns the name of the violated rule in the post-process knowledge file.
static VALUE
rlink_linkage_get_violation_name( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
const char *violation_name = NULL;
violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
if ( violation_name ) {
return rb_str_new2( violation_name );
} else {
return Qnil;
}
}
Return the Array of word spellings or individual word spelling for the current sublinkage. These are the “inflected” spellings, such as “dog.n”. The original spellings can be obtained by calls to LinkParser::Sentence#words.
static VALUE
rlink_linkage_get_words( VALUE self ) {
struct rlink_linkage *ptr = get_linkage( self );
const char **words;
int count, i;
VALUE words_ary;
count = linkage_get_num_words( (Linkage)ptr->linkage );
words = linkage_get_words( (Linkage)ptr->linkage );
words_ary = rb_ary_new2( count );
for ( i = 0; i < count; i++ ) {
rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
}
return words_ary;
}