package Clair::MEAD::Summary;
use strict;
use Carp;

=head1 NAME

Clair::MEAD::Summary - access to a MEAD summary

=head1 SYNOPSIS

    use Clair::MEAD::Wrapper;
    # ...
    my $summary = $wrapper->get_summary();

    # Returns a sentence text only version
    print $summary->to_string() . "\n";

    # Sets the order of the documents to be displayed. Defaults to 
    # alphabetical.
    my @dids = $summary->get_dids();
    my %order;
    my $i = 1;
    foreach my $did (reverse sort @dids) {
        $order{$did} = $i++;
    }
    $summary->set_doc_order(%order);

    my $size = $summary->size();

    # iterate over the sentences
    foreach my $i (1 .. $size) { 
        # maps text, did, sno, rsnt, par
        my %sent = $summary->get_sent($i);

        # longer version
        my $text = $summary->get_text($i);
        my $did = $summary->get_did($i);
        my $sno = $summary->get_sno($i);
        my $rsnt = $summary->get_rsnt($i);
        my $par = $summary->get_par($i);

        # maps feature names to scores
        my %feats = $summary->get_features($i);

    }


=head1 DESCRIPTION

Clair::MEAD::Summary is used to access a MEAD summary produced by MEAD::Wrapper. It
includes methods to access the sentences, and their features.
Clair::MEAD::Summary should only be obtained from other objects and never directly
instantiated.

The order of the sentences in the document is determined by two things. First,
sentences from the same document are always in the same relative position as
they were in the original document. Second, since there is no natural ordering
of documents, there is a method that can be used to set the prefered document
order. The default order is determined by Perl's sort function applied to the
document ids.


=head1 METHODS

=over 4

=item to_string()

Returns a string representation of the summary. The sentences are 
concatenated with a space between them.


=item get_dids()

Returns the IDs of documents that have sentences in the summary.


=item set_doc_order(%map)

Sets an order on the documents. %map should be a hashmap mapping document 
ids (available through get_dids) to numbers. If %map does not contain each
did as a key, then the map will be ignored and the default order will be used.


=item size()

Returns the number of sentences in the summary.


=item get_sent($i)

Returns a hashmap containing the text, sno, did, rsnt, and par of the 
$i_th sentence (where 1 <= $i <= size()). The keys to the map are
"text", "sno", "did", "rsnt", and "par". Returns undef if $i is outside
the given range.


=item get_text($i), get_sno($i), get_did($i), get_rsnt($i), get_par($i)

Shortcut methods. These are equivalent to the key,value pairs in the hashmap
from get_sent($i). Each returns undef if $i is outside the range.


=item get_features($i)

Returns a hashmap mapping feature names to feature values. Returns undef if
$i is out of range.

=back

=cut


sub new {

    my $class = shift;
    my $extract = shift;
    my $centroid = shift;

    croak "Extract not defined" unless (@$extract);

    $centroid = {} unless ($centroid);

    my $i = 1;
    my @sents;
    my %uniq_dids;
    foreach my $sent (@$extract) {

        my ($text, $did, $sno, $rsnt, $par, $feats) = 
           ($sent->{TEXT}, $sent->{DID}, $sent->{SNO}, $sent->{RSNT},
            $sent->{PAR}, $sent->{FEATURES});
            
        if ($text and $did and $sno and $rsnt and $par and $feats) {
            $uniq_dids{$did} = 1;
            push @sents, { 
                text => $text, did => $did, sno => $sno,
                rsnt => $rsnt, par => $par, feats => $feats, order => $i
            };
        } else {
            carp "Sentence $i malformed, skipping";
            next;
        }
        $i++;

    }

    croak "Empty summary" unless @sents;

    my @dids = keys %uniq_dids;
    my %doc_order;
    my $count = 1;
    map { $doc_order{$_} = $count++ } sort @dids;

    my $self = { 
        _sents => \@sents, 
        _dids => \@dids, 
        _doc_order => \%doc_order 
    };

    bless $self, $class;

    $self->_sort_sents();

    return $self;

}

sub to_string {

    my $self = shift;
    my @sents = @{ $self->{_sents} };
    return join " ", map { $_->{text} } @sents;

}


sub get_dids {

    my $self = shift;
    return @{ $self->{_dids} };

}


sub set_doc_order {

    my $self = shift;
    my %order = @_;
    my @dids = @{ $self->{_dids} };

    foreach my $did (@dids) {
        return undef unless $order{$did};
    }

    $self->{_doc_order} = \%order;
    $self->_sort_sents();

    return %order;

}

sub size {

    my $self = shift;
    return scalar @{ $self->{_sents} };

}

sub get_sent {
    
    my $self = shift;
    my @sents = @{ $self->{_sents} };
    my $i = shift;

    unless ($self->_sent_in_range($i)) {
        return undef;
    }

    my $sentref = $sents[$i - 1];
    my %sent = (
        text => $sentref->{text},
        sno => $sentref->{sno},
        did => $sentref->{did},
        rsnt => $sentref->{rsnt},
        par => $sentref->{par},
        order => $sentref->{order}
    );
    return %sent;

}

sub get_text {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "text");
}

sub get_sno {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "sno");
}

sub get_did {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "did");
}

sub get_order {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "order");
}

sub get_par {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "par");
}

sub get_rsnt {
    my $self = shift;
    my $i = shift;
    return $self->_get_sent_attr($i, "rsnt");
}

sub get_features {

    my $self = shift;
    my @sents = @{ $self->{_sents} };
    my $i = shift;

    if ($self->_sent_in_range($i)) {
        return %{ $sents[$i - 1]->{feats} };
    } else {
        return undef;
    }
    
}

sub get_centroid {

    my $self = shift;
    return %{ $self->{_centroid} };

}

sub _get_sent_attr {

    my $self = shift;
    my $i = shift;
    my $attr = shift;

    if ($self->_sent_in_range($i)) {
        my %sent = $self->get_sent($i);
        return $sent{$attr};
    } else {
        return undef;
    }

}

sub _sent_in_range {
    my $self = shift;
    my $i = shift;
    return ( $i <= $self->size() and $i > 0 );
}

sub _sort_sents {

    my $self = shift;
    my @sents = @{ $self->{_sents} };
    my @sorted = sort { $self->_cmp($a, $b) } @sents;
    $self->{_sents} = \@sorted;

}

sub _cmp {

    my $self = shift;
    my ($s1, $s2) = @_;
    my $did1 = $s1->{did};
    my $did2 = $s2->{did};
    my %order = %{ $self->{_doc_order} };

    if ($order{$did1} < $order{$did2}) {
        return -1;
    } elsif ($order{$did1} > $order{$did2}) {
        return 1;
    } else {
        return $s1->{sno} <=> $s2->{sno};
    }

}


1;
