User:AnomieBOT/source/d/Templates.pm

From Wikipedia, the free encyclopedia
< User:AnomieBOT‎ | source‎ | d
Jump to: navigation, search
package d::Templates;

use utf8;
use strict;
use Data::Dumper;
use AnomieBOT::API;
AnomieBOT::API::load('d::Nowiki');

use vars qw/@ISA/;
@ISA=qw/d::Nowiki/;

=pod

=head1 NAME

d::Templates - AnomieBOT template-handling decorator

=head1 SYNOPSIS

 use AnomieBOT::API;

 $api = new AnomieBOT::API('conf.ini', 1);
 $api->decorators(qw/d::Templates/);

=head1 DESCRIPTION

C<d::Templates> contains template manipulating functions for use by an
AnomieBOT task. When "d::Templates" is used as a decorator on the API object,
the following methods are available.

In addition, all A<d::Nowiki> methods are also available, as this decorator
uses them internally.

=head1 METHODS PROVIDED

=over

=item $api->process_templates( $wikitext, $callback, $data )

Runs a parser over the wikitext, calling the callback function for each
template, magic word, or parser function found (basically anything encosed in
double-braces). The callback may return a replacement string, and the final
processed version is returned.

The callback function will be passed the following parameters:

=over

=item $name

The template name or the parser function/magic word invocation. For example,
"reflist" or "#tag:ref". Stripped of leading/trailing spaces and with the first
character uppercased.

=item $params

An array of the parameters. Spaces are not stripped, nor is there any attempt
to interpret named parameters.

=item $wikitext

The raw wikitext of the template.

=item $data

The data object passed in the original call.

=item $orig_name

C<$name> before the stripping and uppercasing.

=back

Any non-C<undef> return value will be used to replace the original template.

=cut

sub process_templates {
    my $api=shift;
    my ($text,$nowiki)=$api->strip_nowiki(shift);
    my $cb=shift;
    my $data=shift;

    my $notags=undef;
    ($text,$notags)=$api->strip_tags([$api->extension_tags], $text);
    if(exists($notags->{$text})){
        # The entire text was in one tag (probably because of a recursive call),
        # so process the contents of that one tag.
        $text=$notags->{$text};
        $notags=undef;
    } else {
        while(my ($k,$v)=each %$notags){
            next unless $text=~/\Q$k\E/;
            $v=$api->replace_stripped($v,$nowiki);
            $notags->{$k}=process_templates($api,$v,$cb,$data);
        }
    }

    my @stack=();
    while($text=~/(\{\{+|\}\}+|\[\[+|\]\]+|\|)/g){
        my $ct=length($1);
        my $i=pos($text)-$ct;
        my $x=@stack?$stack[$#stack]:undef;
        my $c=substr($1,0,1);

        if($c eq "\x7b"){
            # Found at least two open-braces
            push @stack, {
                char=>"\x7b",
                start=>$i,
                count=>$ct,
                pstart=>$i+$ct,
                params=>[]
            };
            $i+=$ct;
        } elsif($c eq "\x5b"){
            # Found at least two open-brackets
            push @stack, {
                char=>"\x5b",
                start=>$i,
                count=>$ct,
                pstart=>$i+$ct,
                params=>[]
            };
            $i+=$ct;
        } elsif($c eq "\x7d" && defined($x) && $x->{'char'} eq "\x7b"){
            # Found at least two close-braces, and we have at least one
            # possible template/variable on the stack.
            $ct=$x->{'count'} if $ct>$x->{'count'};
            $i+=$ct;

            push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$ct-$x->{'pstart'});
            # First, pull out variables
            if($ct>=3){
                $x->{'count'}-=$ct-($ct%3);
                $ct=$ct%3;
                my $s=$x->{'start'}+$x->{'count'};
                $x->{'params'}=[substr($text, $s, $i-$ct-$s)];
            }

            # Ok, any left is a template
            if($ct>=2){
                $x->{'count'}-=2;
                $ct-=2;
                my $s=$x->{'start'}+$x->{'count'};
                my $orig=$api->replace_stripped(substr($text,$s,$i-$ct-$s), $notags, $nowiki);
                map { $_=$api->replace_stripped($_, $notags, $nowiki); } @{$x->{'params'}};
                my $name=shift @{$x->{'params'}};
                my $oname=$name;
                $name=~s/<!--.*?-->//g;
                $name=~s/[\x{200e}\x{200f}\x{202a}-\x{202e}]//g; # MediaWiki strips these from titles
                $name=~s/[\s_\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}]+/ /g; # Mediawiki considers all these as whitespace
                $name=~s/^\s+|\s+$//g;
                $name=~s/^Template\s*:\s*//ig;
                $name=ucfirst($name);
                my $ret=&$cb($name, $x->{'params'}, $orig, $data, $oname);
                if(defined($ret)){
                    $ret="$ret";
                    # If we're completely removing the template and the
                    # template is the only thing on its line, remove the line
                    # too instead of leaving an empty one.
                    my $d=($ret eq '' && ($s==0 || substr($text,$s-1,1) eq "\n") && substr($text,$i-$ct,1) eq "\n")?1:0;
                    substr($text, $s, $i-$ct-$s+$d)=$ret;
                    $i=$s+length($ret)+$ct;
                    $x->{'params'}=[$ret];
                } else {
                    $x->{'params'}=[$orig];
                }
            }
            if($x->{'count'}<2){
                pop @stack;
            } else {
                # The one we just completed might not be the end of the param,
                # so reset the param array and pstart.
                $x->{'params'}=[];
                $x->{'pstart'}=$x->{'start'}+$x->{'count'};
            }
        } elsif($c eq "\x5d" && defined($x) && $x->{'char'} eq "\x5b"){
            # Found at least two close-brackets, and we have at least one
            # possible wikilink on the stack

            # Eat however many brackets are matched
            $ct=$x->{'count'} if $ct>$x->{'count'};
            $i+=$ct;
            $x->{'count'}-=$ct;
            if($x->{'count'}<2){
                pop @stack;
            } else {
                # The one we just completed might not be the end of the param,
                # so reset the param array and pstart.
                $x->{'params'}=[];
                $x->{'pstart'}=$x->{'start'}+$x->{'count'};
            }
        } elsif($c eq '|' && defined($x)){
            push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$x->{'pstart'});
            $x->{'pstart'}=++$i;
        } else {
            $i++;
        }
        pos($text)=$i;
    }

    return $api->replace_stripped($text, $notags, $nowiki);
}

=pod

=item $api->process_paramlist( @params )

Parse named parameters. Returns an array of objects having C<name>, C<oname>,
C<value>, and C<text> parameters. If the parameter was unnamed, C<oname> will
be undef and C<name> will be the calculated parameter number.

=cut

sub process_paramlist {
    my $api=shift;
    my @params=@_;
    my @ret=();
    my $idx=0;

    foreach (@params){
        # Normal unnamed params are easy to detect.
        if(!/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }

        # As long as the naive "name" part doesn't contain the start of a
        # template or a tag or an internal link, it's correct.
        if(/^(\s*([^=<\x5b\x7b]*?)\s*)=\s*(.*?)\s*$/s){ push @ret, { oname=>$1, name=>$2, value=>$3, text=>$_ }; next; }

        # Must be complicated now, the name can contain an equals if it's
        # inside a template, a parameter, an internal link, or a comment. Also,
        # replaced tags just completely screw things up (the parameter name
        # ends up containing the unique token, which is probably impossible to
        # actually _use_ as a parameter).
        my ($text,$nowiki)=$api->strip_tags([$api->extension_tags], $_);

        # Just comments/nowikis/refs fixed it?
        if($text!~/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }
        if($text=~/^\s*([^=\x5b\x7b]*?)\s*=\s*(.*?)\s*$/s){
            my ($oname,$name,$v)=($1,$1,$2);
            $oname=$api->replace_stripped($oname, $nowiki);
            $name=$api->replace_stripped($name, $nowiki);
            $v=$api->replace_stripped($v, $nowiki);
            push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };
            next;
        }

        # No, there must be a template or link in there somewhere...
        my @stack=();
        my $i=0;
        my $len=length($text);
        while($i<$len){
            my $x=@stack?$stack[$#stack]:undef;
            my $xb=undef;
            map { $xb=$_ if $_->{'char'} eq "\x5b" } @stack;

            if(substr($text,$i,2) eq "\x7b\x7b" || substr($text,$i,2) eq "\x5b\x5b"){
                # Found at least two open-braces/brackets
                my $ct;
                my $c=substr($text,$i,1);
                for($ct=2; substr($text,$i+$ct,1) eq $c; $ct++){}
                push @stack, {
                    char=>$c,
                    start=>$i,
                    count=>$ct,
                };
                $i+=$ct;
            } elsif(defined($x) && $x->{'char'} eq "\x7b" && substr($text,$i,2) eq "\x7d\x7d"){
                # Found at least two close-braces, and we have at least one
                # possible template/variable on the stack.
                my $ct;
                for($ct=2; substr($text,$i+$ct,1) eq "\x7d"; $ct++){}
                $ct=$x->{'count'} if $ct>$x->{'count'};
                $i+=$ct;

                while($ct>=3){
                    $x->{'count'}-=3;
                    $ct-=3;
                }
                while($ct>=2){
                    $x->{'count'}-=2;
                    $ct-=2;
                }
                if($x->{'count'}<2){
                    pop @stack;
                }
            } elsif(defined($xb) && substr($text,$i,2) eq "\x5d\x5d"){
                # Found at least two close-brackets, and we have at least one
                # possible wikilink on the stack

                # Drop any pending templates, they're not really templates
                while($stack[$#stack] ne $xb){
                    pop @stack;
                }

                # Eat however many brackets are matched
                my $ct;
                for($ct=2; substr($text,$i+$ct,1) eq "\x5d"; $ct++){}
                $ct=$xb->{'count'} if $ct>$xb->{'count'};
                $i+=$ct;
                $xb->{'count'}-=$ct;
                if($xb->{'count'}<2){
                    pop @stack;
                }
            } elsif(!defined($x) && substr($text,$i,1) eq '='){
                # Found the equals!
                last;
            } else {
                $i++;
            }
        }
        if($i>=$len){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }
        my $oname=substr($text,0,$i);
        $oname=$api->replace_stripped($oname, $nowiki);
        my $name=$oname; $name=~s/^\s+|\s+$//g;
        my $v=substr($text,$i+1);
        $v=~s/^\s+|\s+$//g;
        $v=$api->replace_stripped($v, $nowiki);
        push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };
        next;
    }

    return @ret;
}

=pod

=item $api->strip_templates( $wikitext, \&callback, $data )

=item $api->strip_templates( $wikitext, \&callback, $data, \%matches )

Runs a parser over the wikitext, calling the callback function for each
template, magic word, or parser function found (basically anything encosed in
double-braces). If the callback returns a true value, the template is replaced
by an opaque token.

The callback function will be passed the same parameters as for
C<process_templates>. The return value is the same as for C<strip_regex> from
the A<d:Nowiki> decorator.

=item $api->strip_templates( $wikitext, \@templates )

=item $api->strip_templates( $wikitext, \@templates, \%matches )

As above, with a callback function that just tests whether the C<$name> is in
(or matches a regex in) the provided array.

=cut

sub strip_templates {
    my $api=shift;
    my $text=shift;
    my $cb=shift;
    my $data;
    if(ref($cb) eq 'ARRAY'){
        $data=$cb;
        $cb=\&_strip_templates_in_list;
    } else {
        $data=shift;
    }
    my $mapping=shift // {};

    $text=$api->process_templates($text, sub {
        return undef unless &$cb(@_);
        my $x=$api->replace_stripped($_[2],$mapping);
        my $tag=$api->get_token_for($x);
        $mapping->{$tag}=$x;
        return $tag;
    }, $data);

    return wantarray ? ($text,$mapping) : $text;
}

sub _strip_templates_in_list {
    return grep(ref($_) ? $_[0]=~/$_/ : $_[0] eq $_, @{$_[3]});
}

1;

=pod

=back

=head1 COPYRIGHT

Copyright 20082013 Anomie

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.