class LinkParser::Linkage

Additional high-level functionality for LinkParser::Sentence objects.

Synopsis

Authors

Version

$Id: linkage.rb,v 507ef20fc315 2011/01/11 19:06:53 ged $

License

Copyright © 2006-2011, The FaerieMUD Consortium All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Constants

CTree

end)

Descriptions of the linkage types, keyed by linkage symbol

Link struct (:lword, :rword, :length, :label, :llabel, :rlabel, :desc)

Public Class Methods

new( index, sentence, options={} ) → LinkParser::Linkage click to toggle source

Create a new LinkParser::Linkage object out of the linkage indicated by index (a positive Integer) from the specified sentence (a LinkParser::Sentence). The optional options hash can be used to override the parse options of the Sentence for the new linkage.

static VALUE
rlink_linkage_init( argc, argv, self )
        int argc;
        VALUE *argv;
        VALUE self;
{
        if ( !check_linkage(self) ) {
                int i, link_index, max_index;
                VALUE index, sentence, options, defopts;
                struct rlink_sentence *sent_ptr;
                Linkage linkage;
                Parse_Options opts;
                struct rlink_linkage *ptr;

                i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );

                defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
                options = rlink_make_parse_options( defopts, options );
                opts = rlink_get_parseopts( options );

                sent_ptr = (struct rlink_sentence *)rlink_get_sentence( sentence );

                link_index = NUM2INT(index);
                max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
                if ( link_index > max_index )
                        rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
                                link_index, max_index );

                linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
                if ( !linkage ) rlink_raise_lp_error();

                DATA_PTR( self ) = ptr = rlink_linkage_alloc();

                ptr->linkage = linkage;
                ptr->sentence = sentence;
        }

        else {
                rb_raise( rb_eRuntimeError,
                                  "Cannot re-initialize a linkage once it's been created." );
        }

        return Qnil;
}

Public Instance Methods

and_cost → fixnum click to toggle source

Returns the AND cost of the linkage, which is the difference in length between and-list elements.

static VALUE
rlink_linkage_and_cost( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval;

        rval = linkage_and_cost( (Linkage)ptr->linkage );

        return INT2FIX( rval );
}
canonical? → true or false click to toggle source

Returns true if the linkage is canonical. The canonical linkage is the one in which the minimal disjunct that ever occurrs in a position is used in that position.

static VALUE
rlink_linkage_canonical_p( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval = 0;

        rval = linkage_is_canonical( (Linkage)ptr->linkage );

        return rval ? Qtrue : Qfalse;
}
compute_union → true or false click to toggle source

If the linkage has a conjunction, combine all of the links occurring in all sublinkages together – in effect creating a “master” linkage (which may have crossing links). The union is created as another sublinkage, thus increasing the number of sublinkages by one, and is returned by this method. If the linkage has no conjunctions, computing its union has no effect. This method returns true if computing its union caused another sublinkage to be created.

static VALUE
rlink_linkage_compute_union( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int before, after;

        before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
        linkage_compute_union( (Linkage)ptr->linkage );
        after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );

        return (after > before) ? Qtrue : Qfalse;
}
constituent_tree → hash click to toggle source

Return the Linkage’s constituent tree as a Array of hierarchical “CTree” structs.

sent = dict.parse( "He is a big dog." )
link = sent.linkages.first
ctree = link.constituent_tree
# => [#<struct Struct::LinkParserLinkageCTree label="S",
          children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...],
          start=0, end=5>]
static VALUE
rlink_linkage_constituent_tree( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        CNode *ctree = NULL;
        VALUE rval = Qnil;

        ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
        rval = rlink_linkage_make_cnode_array( ctree );

        linkage_free_constituent_tree( ctree );
        return rval;
}
constituent_tree_string( mode=1 ) → str click to toggle source

Return the constituent tree as a printable string.

Example:

sent = dict.parse( "He is a big dog." )
link = sent.linkages.first
link.constituent_tree_string

# ==> "(S (NP He)\n   (VP is\n       (NP a big dog))\n   .)\n"
static VALUE
rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        char *ctree_string = NULL;
        VALUE rval = Qnil, modenum = Qnil;
        int mode;

        if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
                mode = NUM2INT( modenum );
        } else {
                mode = 1;
        }

        if ( mode < 1 || mode > 3 )
                rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );

        ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );

        if ( ctree_string ) {
                rval = rb_str_new2( ctree_string );
                linkage_free_constituent_tree_str( ctree_string );
        } else {
                rval = Qnil;
        }

        return rval;
}
current_sublinkage → fixnum click to toggle source

Get the index of the current sublinkage.

static VALUE
rlink_linkage_current_sublinkage( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval = 0;

        rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );

        return INT2FIX( rval );
}
current_sublinkage = index → true or false click to toggle source

After this call, all operations on the linkage will refer to the index-th sublinkage. In the case of a linkage without conjunctions, this has no effect.

static VALUE
rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval = 0;

        rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );

        return INT2FIX( rval );
}
diagram → str click to toggle source

Return a String containing a diagram of the linkage.

static VALUE
rlink_linkage_diagram( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        char *diagram_cstr;
        VALUE diagram;

        diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
        diagram = rb_str_new2( diagram_cstr );
        linkage_free_diagram( diagram_cstr );

        return diagram;
}
disjunct_cost → fixnum click to toggle source

Returns the connector or disjunct cost of the linkage.

static VALUE
rlink_linkage_disjunct_cost( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval;

        rval = linkage_disjunct_cost( (Linkage)ptr->linkage );

        return INT2FIX( rval );
}
disjunct_strings() click to toggle source

#disjunct_strings -> array

Return an Array of Strings showing the disjuncts that were actually used in association with each corresponding word in the current linkage. Each string shows the disjuncts in proper order; that is, left-to-right, in the order in which they link to other words. The returned strings can be thought of as a very precise part-of-speech-like label for each word, indicating how it was used in the given sentence; this can be useful for corpus statistics.

For a parsed version of the disjunct strings, call #disjuncts instead.

static VALUE
rlink_linkage_get_disjunct_strings( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        const char *disjunct;
        int count, i;
        VALUE disjuncts_ary;

        count = linkage_get_num_words( (Linkage)ptr->linkage );
        disjuncts_ary = rb_ary_new2( count );

        for ( i = 0; i < count; i++ ) {
#ifdef HAVE_LINKAGE_GET_DISJUNCT_STR
                disjunct = linkage_get_disjunct_str( (Linkage)ptr->linkage, i );
#else
                disjunct = linkage_get_disjunct( (Linkage)ptr->linkage, i );
#endif
                if ( disjunct ) {
                        rb_ary_store( disjuncts_ary, i, rb_str_new2(disjunct) );

                } else {
                        rb_ary_store( disjuncts_ary, i, Qnil );
                }
        }

        return disjuncts_ary;
}
disjuncts() click to toggle source

Return an Array of parsed (well, just split on whitespace for now) disjunct strings for the linkage.

# File lib/linkparser/linkage.rb, line 205
def disjuncts
        return self.disjunct_strings.collect do |dstr|
                if dstr.nil?
                        nil
                else
                        dstr.split
                end
        end
end
has_conjunction?() click to toggle source

Returns true if the linkage has more than one sublinkage (i.e., the sentence has a conjunction).

# File lib/linkparser/linkage.rb, line 37
def has_conjunction?
        return self.num_sublinkages > 1
end
has_inconsistent_domains? → true or false click to toggle source

Returns true if the linkage has inconsistent domains. – :fixme: Find out what it means that a linkage has inconsistent domains.

static VALUE
rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval = 0;

        rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );

        return rval ? Qtrue : Qfalse;
}
imperative?() click to toggle source

Returns true if the linkage indicates the sentence is phrased in the imperative voice.

# File lib/linkparser/linkage.rb, line 258
def imperative?
        return self.links.find {|link| link.label == 'Wi' && link.rword =~ /\.v$/ } ?
                true : false
end
improper? → true or false click to toggle source

Returns true if the linkage is “improper”. – :fixme: Find out what an “improper fat linkage” is.

static VALUE
rlink_linkage_improper_p( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval = 0;

        rval = linkage_is_improper( (Linkage)ptr->linkage );

        return rval ? Qtrue : Qfalse;
}
inspect() click to toggle source

Return a human-readable representation of the Sentence object.

# File lib/linkparser/linkage.rb, line 171
def inspect
        return %Q{#<%s:0x%x: [%d links]>} % [
                self.class.name,
                self.object_id / 2,
                self.num_links
        ]
end
nouns() click to toggle source

Return an Array of all the nouns in the linkage.

# File lib/linkparser/linkage.rb, line 245
def nouns
        nouns = []
        self.links.each do |link|
                nouns << $1 if link.lword =~ /^(.*)\.n(?:-\w)?$/
                nouns << $1 if link.rword =~ /^(.*)\.n(?:-\w)?$/
        end

        return nouns.uniq
end
num_sublinkages → fixnum click to toggle source

Return the number of sublinkages for a linkage with conjunctions, 1 otherwise.

static VALUE
rlink_linkage_num_sublinkages( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
}
num_words() click to toggle source

#num_words -- The number of words in the sentence for which this is a linkage. Note that this function does not return the number of words used in the current sublinkage.

static VALUE
rlink_linkage_get_num_words( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
}
Also aliased as: word_count
object() click to toggle source

Return the object from the linkage.

# File lib/linkparser/linkage.rb, line 238
def object
        objlink = self.links.find {|link| link.rlabel[0] == OO } or return nil
        return objlink.rword.sub( /\.[np](?:-\w)?$/, '' )
end
postscript_diagram( full_doc=false ) → str click to toggle source

Returns the macros needed to print out the linkage in a postscript file. By default, the output is just the set of postscript macros that describe the diagram. With full_doc=true a complete encapsulated postscript document is returned.

static VALUE
rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
        struct rlink_linkage *ptr = get_linkage( self );
        char *diagram_cstr;
        VALUE diagram;

        diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
                RTEST(full_doc) ? 1 : 0 );
        diagram = rb_str_new2( diagram_cstr );
        linkage_free_postscript( diagram_cstr );

        return diagram;
}
subject() click to toggle source

Return the subject from the linkage.

# File lib/linkparser/linkage.rb, line 231
def subject
        subjlink = self.links.find {|link| link.llabel[0] == SS } or return nil
        return subjlink.lword.sub( /\.[np](?:-\w)?$/, '' )
end
unused_word_cost → fixnum click to toggle source

Returns the unused word cost of the linkage, which corresponds to the number of null links that were required to parse it.

static VALUE
rlink_linkage_unused_word_cost( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        int rval;

        rval = linkage_unused_word_cost( (Linkage)ptr->linkage );

        return INT2FIX( rval );
}
verb() click to toggle source

Return the verb word from the linkage.

# File lib/linkparser/linkage.rb, line 217
def verb
        if verblink = self.links.find {|link| link.llabel =~ /^(O([DFNTX]?)|P|BI|K|LI|MV|Q)[a-z\*]*/ }
                # $deferr.puts "Picking %s: LL of %p is %s" % [ link.lword, link, link.llabel ]
                return verblink.lword.sub( /\.v(-d)?$/, '' )
        elsif verblink = self.links.find {|link| link.rlabel =~ /^(SI|S|AF)[a-z\*]*/ }
                # $deferr.puts "Picking %s: RL of %p is %s" % [ link.rword, link, link.rlabel ]
                return verblink.rword.sub( /\.v(-d)?$/, '' )
        else
                return nil
        end
end
violation_name → str click to toggle source

If the linkage violated any post-processing rules, this method returns the name of the violated rule in the post-process knowledge file.

static VALUE
rlink_linkage_get_violation_name( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        const char *violation_name = NULL;

        violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );

        if ( violation_name ) {
                return rb_str_new2( violation_name );
        } else {
                return Qnil;
        }
}
word_count() click to toggle source
Alias for: num_words
words → array click to toggle source

Return the Array of word spellings or individual word spelling for the current sublinkage. These are the “inflected” spellings, such as “dog.n”. The original spellings can be obtained by calls to LinkParser::Sentence#words.

static VALUE
rlink_linkage_get_words( VALUE self ) {
        struct rlink_linkage *ptr = get_linkage( self );
        const char **words;
        int count, i;
        VALUE words_ary;

        count = linkage_get_num_words( (Linkage)ptr->linkage );
        words = linkage_get_words( (Linkage)ptr->linkage );
        words_ary = rb_ary_new2( count );

        for ( i = 0; i < count; i++ ) {
                rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
        }

        return words_ary;
}