User:AnomieBOT/source/tasks/ITNCArchiver.pm

From Wikipedia, the free encyclopedia
package tasks::ITNCArchiver;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    ITNCArchiver
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 42
Status:  Approved 2010-09-10
Created: 2010-08-31

Daily at 0000 UTC:
* Add the section for the new day at [[WP:ITN/C]]
* Archive the oldest day from [[WP:ITN/C]] to the appropriate archive page
* Update [[Wikipedia:In the news/Candidates/Archives]] as new archive subpages are created.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my @months=('','January','February','March','April','May','June','July','August','September','October','November','December');

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2010-09-10<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 42]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('ITNCArchiver', 0, 10, qw/d::Sections d::Timestamp d::Talk/);

    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/ITNCArchiver]]';
    my $b0rken=0;
    my $re='^\s*('.join('|',@months[1..12]).')\s+(\d+)\s*$'; $re=qr/$re/;

    # Date to add
    my @today=gmtime;
    my $today=strftime("%B %-d", @today);

    # Load WP:ITN/C
    $api->log("Checking WP:ITN/C");
    my $tok=$api->edittoken('Wikipedia:In the news/Candidates');
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for Wikipedia:In the news/Candidates: ".$tok->{'error'}."\n");
        return 60;
    }
    if(exists($tok->{'missing'})){
        $api->warn("Wikipedia:In the news/Candidates does not exist, WTF?");
        return 300;
    }

    # Split the page into sections, and figure out if we need to add a new one
    # or archive any old ones.
    my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
    my $added_new=0;
    my %archive=();
    my @sections=$api->split_sections($intxt, '2');
    my @outsections=();
    my $ct=0;
    foreach my $s (@sections){
        if(defined($s->{'level'}) && $s->{'title'}=~/$re/){
            $today=0 if "$1 $2" eq $today;
        }
    }
    foreach my $s (@sections){
        # Only do dated sections
        if(defined($s->{'level'}) && $s->{'title'}=~/$re/){
            my ($m,$d)=($1,$2);
            for(my $i=1; $i<@months; $i++){
                $m=$i if $m eq $months[$i];
            }
            # Assume it's last year if the month > the current month
            my $y=$today[5]+1900;
            $y-- if $m>$today[4]+1;
            $api->debug(1,"Processing section $s->{title} => $d $m $y\n");
            if($today){
                # Add in a new section
                my $dt=($today[5]+1900).' '.$today;
                $api->debug(1,"Adding new section for $dt\n");
                push @outsections, {
                    level => $s->{'level'},
                    title => $today,
                    titlespaced => $s->{'titlespaced'},
                    titlecomment => '',
                    body => "{{cot|[[Portal:Current events/$dt]]}}\n{{Portal:Current events/$dt}}\n{{cob}}\n----\n<!-- Insert new nominations below this line -->\n",
                };
                $ct++;
                $added_new=1;
                $today=0;
            }
            if(++$ct>8){
                # We keep only 8 dates sections, so archive any later
                $api->debug(1,"Archiving section $s->{title}\n");
                my $a=$months[$m].' '.$y;
                $archive{$a}=[] unless exists($archive{$a});
                push @{$archive{$a}}, $s;
                next;
            }
        }
        # Copy section to output
        $s->{'body'}=~s/\n<!-- Insert new nominations below this line -->\n/\n/g if $added_new;
        push @outsections, $s;
    }

    # Reconstruct the page, and save if necessary
    my $outtxt=$api->join_sections(@outsections);
    if($added_new || %archive){
        my $summary='';
        $summary.="Adding section for ".$months[$today[4]+1]." ".$today[3] if $added_new;
        if(%archive){
            my @a=map $_->{'title'}, map @$_, values %archive;
            $a[$#a]='and '.$a[$#a] if @a>1;
            $summary.=($summary?' and a':'A').'rchiving '.join(@a>2?', ':' ', @a);
        }
        $api->log("$summary in $tok->{title}");

        my $r=$api->edit($tok, $outtxt, "$summary. $screwup", 0, 1);
        if($r->{'code'} ne 'success'){
            $api->warn("Write failed on $tok->{title}: ".$r->{'error'}."\n");
            return 60;
        }
        if(%archive){
            # Save succeeded, so save the sections-to-archive to the
            # bot's store.
            my $a=$api->store->{'archive'} // {};
            for my $k (keys %archive){
                $a->{$k}=[@{$archive{$k}}, @{$a->{$k} // []}];
            }
            $api->store->{'archive'}=$a;
        }
    }

    # Pull sections-to-archive from the bot's store, and write them to the
    # appropriate archive pages.
    my $a=$api->store->{'archive'};
    for my $k (keys %$a){
        $tok=$api->edittoken("Wikipedia:In the news/Candidates/$k");
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for Wikipedia:In the news/Candidates/$k: ".$tok->{'error'}."\n");
            $b0rken=1;
            next;
        }

        my @a=map $_->{'title'}, @{$a->{$k}};
        $a[$#a]='and '.$a[$#a] if @a>1;
        my $summary='Archiving '.join(@a>2?', ':' ', @a);
        my $txt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // "<div style=\"padding: 5px; background: #9FE391; border: 1px solid #EC6633\">\n<center>\nThis page is an archive and its contents should be preserved in their current form;<br>\nany comments regarding this page should be directed to [[Wikipedia talk:In the news]]. Thanks.\n</center>\n</div>\n<!-- ADD NEW ARCHIVE HERE -->";
        my $v=$api->join_sections(@{$a->{$k}});
        $v=~s/^\s+|\s+$//g;
        unless($txt=~s/<!-- ADD NEW ARCHIVE HERE -->/<!-- ADD NEW ARCHIVE HERE -->\n$v/){
            $api->warn("Failed to find archive marker in ".$tok->{'title'}."\n");
            $api->whine("Cannot update [[".$tok->{'title'}."]]", "I could not find the <code><nowiki><!-- ADD NEW ARCHIVE HERE --></nowiki></code> marker in [[".$tok->{'title'}."]], so I don't know where to insert the archive. Please replace it. Thanks!");
            $b0rken=1;
            next;
        }

        $api->log("$summary to ".$tok->{'title'});
        my $r=$api->edit($tok, $txt, "$summary. $screwup", 0, 1);
        if($r->{'code'} ne 'success'){
            $api->warn("Write failed on $tok->{title}: ".$r->{'error'}."\n");
            $b0rken=1;
            next;
        }

        # Archival succeeded, so remove the sections-to-archive from the store
        delete $a->{$k};
        $api->store->{'archive'}=$a;
    }

    # Now, we need to update the archive listing
    {
        $tok=$api->edittoken("Wikipedia:In the news/Candidates/Archives");
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for Wikipedia:In the news/Candidates/Archives: ".$tok->{'error'}."\n");
            $b0rken=1;
            last;
        }

        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
        $intxt=~s/^\s+|\s+$//g;

        unless($intxt=~/<!-- Start archive links -->.*<!-- End archive links -->/s){
            $api->warn("Failed to find link markers in Wikipedia:In the news/Candidates/Archives\n");
            $api->whine("Cannot update [[Wikipedia:In the news/Candidates/Archives]]", "I could not find the <code><nowiki><!-- Start archive links --></nowiki></code> and <code><nowiki><!-- End archive links --></nowiki></code> markers in [[Wikipedia:In the news/Candidates/Archives]], so I don't know where to insert the links. Please replace them. Thanks!");
            $b0rken=1;
            last;
        }

        my %links=();
        my $re='^Wikipedia:In the news/Candidates/(('.join('|',@months[1..12]).')(?:–(?:'.join('|',@months[1..12]).'))?\s+(\d+))$'; $re=qr/$re/;
        my $i=$api->iterator(
            list        => 'allpages',
            apnamespace => 4,
            apprefix    => 'In the news/Candidates/',
            aplimit     => 'max'
        );
        while($_=$i->next){
            if(!$_->{'_ok_'}){
                $api->warn("Could not retrieve pages from iterator: ".$_->{'error'}."\n");
                return 60;
            }
            $_=$_->{'title'};
            next unless /$re/;
            my ($x,$m,$y)=($1,$2,$3);
            for(my $i=1; $i<@months; $i++){
                $m=$i if $m eq $months[$i];
            }
            $links{sprintf("%04d-%02d", $y, $m)}=(($m==1)?"\n":"")."[[$_|$x]]";
        }
        my @links=();
        for my $k (sort keys %links){
            push @links, $links{$k};
        }
        my $links=join(" &bull; ", @links);
        $links=~s/^\s+|\s+$//g;
        $links="<!-- Automatically updated by AnomieBOT -->\n$links";
        my $outtxt=$intxt;
        $outtxt=~s/<!-- Start archive links -->.*<!-- End archive links -->/<!-- Start archive links -->\n$links\n<!-- End archive links -->/s;

        if($intxt ne $outtxt){
            $api->log("Updating ".$tok->{'title'});
            my $r=$api->edit($tok, $outtxt, "Updating archive page list. $screwup", 0, 1);
            if($r->{'code'} ne 'success'){
                $api->warn("Write failed on $tok->{title}: ".$r->{'error'}."\n");
                $b0rken=1;
            }
        }
    }

    # Retry if broken, otherwise go again tomorrow
    my $t=86400-(time()%86400);
    $t=300 if($b0rken && $t>300);
    return $t;
}

1;