User:AnomieBOT/source/tasks/WatchlistUpdater.pm

From Wikipedia, the free encyclopedia
package tasks::WatchlistUpdater;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    WatchlistUpdater
BRFA:    N/A
Status:  Begun 2008-08-15
Created: 2008-08-16

Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use Data::Dumper;

my @cfg_pages=(
    {
        page        => 'User:Anomie/uw-templates',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'Uw-',
            apnamespace => '10',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating list of uw-* templates',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:AnomieBOT/index',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'AnomieBOT/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'AnomieBOT II/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'AnomieBOT III/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'MediationBot/',
            apnamespace => '2',
            aplimit     => 'max'
        },{
            list        => 'allpages',
            apprefix    => 'MedcabBot/',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating userspace index',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:Anomie/index',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'Anomie/',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating userspace index',
        botflag     => 1,
        outprefix   => sub { "{| class=\"wikitable\"\n" },
        outformat   => sub {
                my ($main, $talk);
                if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
                    $main=':'.$_[1]{'title'};
                } else {
                    $main=$_[1]{'title'};
                }
                if($_[1]{'ns'}==0){
                    $talk="Talk:".$_[1]{'title'};
                } else {
                    $talk=$_[1]{'title'};
                    substr($talk, index($talk, ':'), 0)=' talk';
                }
                return "|-\n|[[$main]]||[[$talk]]\n",
            },
        outerror    => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
        outsuffix   => sub { "|}" }
    },
    {
        page        => 'User:AnomieBOT/nobots tests',
        beginmarker => "\n<!-- SNIP HERE -->\n",
        endmarker   => '',
        frequency   => 6*60*60,
        maxrows     => 10000,
        query       => [{
            list        => 'allpages',
            apprefix    => 'AnomieBOT/nobots test ',
            apnamespace => '2',
            aplimit     => 'max'
        }],
        gcontinue   => 'allpages',
        result      => 'allpages',
        match       => {},
        summary     => 'Automatically updating list of bot exclusion tests',
        botflag     => 1,
        outprefix   => sub { "{{div col}}\n" },
        keyforpage  => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
        outformat   => sub {
                my $t=$_[1]{'title'};
                $t=~s/^[^|]*\|//;
                return "* [[$t]]\n";
            },
        outerror    => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
        outsuffix   => sub { "\n{{div col end}}" }
    }
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    $self->{'pages'}=[@cfg_pages];
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;

    $api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));

    my $endtime=time()+300;

    foreach my $data (@{$self->{'pages'}}){
        my $page=$data->{'page'};

        # We've run too long, wait on the rest until next time
        return 0 if time()>=$endtime;

        # Check last run time if we haven't already recorded it
        if(!exists($data->{'lastrun'})){
            my $res=$api->query(
                titles  => $page,
                prop    => 'revisions',
                rvuser  => $api->user,
                rvprop  => 'timestamp',
                rvlimit => 1  # Only need the last rev
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve last edit date for $page: ".$res->{'error'}."\n");
                return 60;
            }
            $res=[values(%{$res->{'query'}{'pages'}})];
            if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
                $data->{'lastrun'}=$api->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
            } else {
                $data->{'lastrun'}=0;
            }
        }

        # Time to check again?
        next unless time()>=$data->{'lastrun'}+$data->{'frequency'};

        # Get edit token
        my $tok=$api->edittoken($page);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
            return 60;
        }
        if(exists($tok->{'missing'})){
            $api->warn("Page $page does not exist");
            $data->{'lastrun'}=time();
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

        # Generate new table
        my %out=();
        my $rows=0;
        my %cont=();
        my @queries=@{$data->{'query'}};
        my $query=shift @queries;
        do {
            my $res=$api->query([$data->{'gcontinue'}], %$query, %cont);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve data for $page: ".$res->{'error'});
                return 60;
            }
            %cont=();
            if(exists($res->{'query-continue'})){
                foreach my $n (values %{$res->{'query-continue'}}){
                    %cont=(%cont, %$n);
                }
            }
            $query=shift @queries unless(%cont);
            $res=$res->{'query'}{$data->{'result'}};
            my @r;
            if(ref($res) eq 'ARRAY'){
                @r=@$res;
            } elsif(ref($res) eq 'HASH'){
                @r=values %$res;
            } else {
                $api->warn("Invalid data for $page: Not an array or hash ref");
                return 60;
            }
            foreach (@r){
                next if ($_->{'ns'}&1)==1;
                next unless _match($data->{'match'}, $_);
                my $k=exists($data->{'keyforpage'})?$data->{'keyforpage'}($_):$_->{'title'};
                $out{$k}=$_;
                last if ++$rows>$data->{'maxrows'};
            }
        } while($rows<=$data->{'maxrows'} && $query);
        my $x={};
        my $table=$data->{'outprefix'}($x);
        map { $table.=$data->{'outformat'}($x,$out{$_}); } sort keys %out;
        $table.=$data->{'outerror'}($x,"<strong class=\"error\">List truncated at $rows rows</strong>") if $rows>$data->{'maxrows'};
        $table.=$data->{'outsuffix'}($x);

        # Perform edit, if needed
        my $outtxt=$intxt;
        my ($begin,$end);
        if($data->{'beginmarker'} eq ''){
            $begin=0;
        } else {
            $begin=index($outtxt, $data->{'beginmarker'});
            $begin+=length($data->{'beginmarker'}) if $begin>=0;
        }
        if($data->{'endmarker'} eq ''){
            $end=length($outtxt);
        } else {
            $end=index($outtxt, $data->{'endmarker'}, $begin);
        }
        if($begin<0 || $end<0){
            $api->warn("Begin/end markers not found, refusing to edit $page\n");
        } else {
            substr($outtxt,$begin,$end-$begin)=$table;
            if($intxt eq $outtxt){
                $api->log("No update needed for $page");
            } else {
                my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
                if($res->{'code'} ne 'success'){
                    $api->warn("Write for $page failed: ".$res->{'error'});
                    next;
                }
                $api->log("Updated $page");
            }
        }

        # Record last update time
        $data->{'lastrun'}=time();
    }

    # We processed all pages, calculate the number of seconds until the next
    # time we're needed.
    my $t=864000; # arbitrary initial/max value
    foreach (@{$self->{'pages'}}){
        next if $_->{'lastrun'}==0;
        my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
        $t=$tt if $tt<$t;
    }
    return $t;
}

sub _match {
    my $match = shift;
    my $value = shift;

    return $match->($value) if(ref($match) eq 'CODE');

    if(ref($match) eq 'ARRAY'){
        my $ok=0;
        foreach (@$match){ $ok=($ok || _match($_,$value)); }
        return $ok;
    }
    if(ref($value) eq 'ARRAY'){
        my $ok=0;
        foreach (@$value){ $ok=($ok || _match($match,$_)); }
        return $ok;
    }

    return !defined($value) if !defined($match);
    return 0 if !defined($value);
    return ($match eq $value) if !ref($match);
    return $value=~/$match/ if(ref($match) eq 'Regexp');

    if(ref($match) eq 'HASH'){
        return 0 if ref($value) ne 'HASH';
        my $ok=1;
        while(my ($k,$v)=each(%$match)){
            my $v2=$value->{$k} // undef;
            $ok=($ok && _match($v,$v2));
        }
        return $ok;
    }

    return 0;
}

1;