Jump to content

User:AnomieBOT/source/tasks/ACNClerk.pm: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: ACNClerk: * Comment out excessively verbose comments. TagDater: * Minor change, this seems better.
AnomieBOT (talk | contribs)
Updating published sources: ACNClerk: * Don't change active discussions to "Archived discussion".
Line 174: Line 174:
my $new=($old=~/^WP:|^Wikipedia:/i)?($WPmap{$anchor}//''):($WTmap{$anchor}//'');
my $new=($old=~/^WP:|^Wikipedia:/i)?($WPmap{$anchor}//''):($WTmap{$anchor}//'');
if($new){
if($new){
$outtxt=~s/\[\[\Q$old\E\|((?:''')?)(?:Discussion|Discuss|Discuss (?:this|announcement|report))\g{1}\]\]/$1\[[$new#$a|Archived discussion]]$1/g;
if($new=~m{/Archive \d+$}){
$outtxt=~s/\[\[\Q$old\E\|((?:''')?)(?:Discussion|Discuss|Discuss (?:this|announcement|report))\g{1}\]\]/$1\[[$new#$a|Archived discussion]]$1/g;
$outtxt=~s/\[\[\Q$old\E(?=\||\]\])/[[$new#$a/g;
$outtxt=~s/\[\[\Q$old\E(?=\||\]\])/[[$new#$a/g;
}
} else {
} else {
$api->warn("No mapping for $anchor in $title");
$api->warn("No mapping for $anchor in $title");

Revision as of 02:54, 21 January 2011

package tasks::ACNClerk;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    ACNClerk
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 51
Status:  In trial
Created: 2011-01-13

Update crosslinks on [[WP:ACN]] and [[WT:ACN]] when content is archived.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use Data::Dumper;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approval requested<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 51]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('ACNClerk', 0, 10, qw(d::Timestamp d::Redirects d::Talk d::Trial));

    $res=$api->check_trial(1295722543,'AnomieBOT 51');
    return $$res if $res;

    my $starttime=time();

    # Figure out which pages need re-scanning
    my %scan=();
    my @WPpages=();
    my @WTpages=();
    my $iter=$api->iterator(
        generator    => 'allpages',
        gapnamespace => [4,5],
        gapprefix    => 'Arbitration Committee/Noticeboard',
        gaplimit     => 'max',
        prop         => 'info',
    );
    while(my $p=$iter->next){
        my $title=$p->{'title'};
        my $t=$title; $t=~s/^(?:Wikipedia|Wikipedia talk)://;
        next unless $title=~/^Wikipedia(?: talk)?:Arbitration Committee\/Noticeboard(?:\/Archive (\d+))?$/;
        push @WPpages, $title if $iter->iterval==4;
        push @WTpages, $title if $iter->iterval==5;
        $scan{$title}=$p->{'lastrevid'} unless $p->{'lastrevid'}==($api->store->{"revid $title"}//0);
    }

    # Load the headers for the needed pages
    foreach my $title (keys %scan) {
        return 0 if $api->halting;
        $res=$api->query(
            action => 'parse',
            title  => $title,
            text   => "__TOC__\n{{:$title}}",
            prop   => 'sections',
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve section list for $title: ".$res->{'error'});
            return 60;
        }
        my @s=();
        foreach my $s (@{$res->{'parse'}{'sections'}}) {
            push @s, { line => $s->{'line'}, anchor => $s->{'anchor'} };
        }
        $api->store->{"toc $title"}=\@s;
        $api->store->{"revid $title"}=$scan{$title};
    }

    # Construct the mappings
    my %WPmap=();
    foreach my $title (@WPpages) {
        foreach my $s (@{$api->store->{"toc $title"}}) {
            my $tt=$title;
            if(exists($WPmap{$s->{'anchor'}})){
                my $t=$WPmap{$s->{'anchor'}};
                my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
                my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
                $tt=($n1>=$n2)?$title:$t;
                #$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
            }
            $WPmap{$s->{'anchor'}}=$tt;
        }
    }
    my %WTmap=();
    foreach my $title (@WTpages) {
        foreach my $s (@{$api->store->{"toc $title"}}) {
            my $tt=$title;
            if(exists($WTmap{$s->{'anchor'}})){
                my $t=$WTmap{$s->{'anchor'}};
                my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
                my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
                $tt=($n1>=$n2)?$title:$t;
                #$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
            }
            $WTmap{$s->{'anchor'}}=$tt;
        }
    }

    # Scan the pages looking for things to replace
    my $re=join('|', 
        '(?i:Wikipedia|WP):Arbitration[ _]Committee/Noticeboard',
        '(?i:Wikipedia|WP):AC/N',
        '(?i:Wikipedia|WP):ACN',
        '(?i:Wikipedia|WP):ARBCOM/N',
        '(?i:Wikipedia|WP):ARB/N',
        '(?i:Wikipedia|WP):ARBN',
        '(?i:Wikipedia[ _]talk|WT):Arbitration[ _]Committee/Noticeboard',
        '(?i:Wikipedia[ _]talk|WT):AC/N',
        '(?i:Wikipedia[ _]talk|WT):ACN',
    );
    $re=qr/\[\[((?:$re)#[^\x5d\x7c\r\n]+)(?=\||\]\])/;
    foreach my $title (keys %scan) {
        return 0 if $api->halting;
        my $tok=$api->edittoken($title);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $title: ".$tok->{'error'});
            return 60;
        }
        my $intxt=$tok->{'revisions'}[0]{'*'};
        my @links=($intxt=~/$re/g);
        my $txt='';
        for(my $i=0; $i<@links; $i++){
            my $a=$links[$i]; $a=~s/^[^#]*#//;
            $txt.="$i:{{anchorencode:$a}}\n";
        }
        $res=$api->query(action=>'expandtemplates',text=>$txt);
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve anchor mapping for $title: ".$res->{'error'});
            return 60;
        }
        my $outtxt=$intxt;
        foreach my $l (split /\n/, $res->{'expandtemplates'}{'*'}) {
            unless($l=~/^(\d+):(.+)$/){
                $api->warn("Invalid response checking anchor mapping in $title");
                return 60;
            }
            my ($old,$anchor)=($links[$1],$2);
            my $a=$old; $a=~s/^[^#]*#//;
            my $new=($old=~/^WP:|^Wikipedia:/i)?($WPmap{$anchor}//''):($WTmap{$anchor}//'');
            if($new){
                if($new=~m{/Archive \d+$}){
                    $outtxt=~s/\[\[\Q$old\E\|((?:''')?)(?:Discussion|Discuss|Discuss (?:this|announcement|report))\g{1}\]\]/$1\[[$new#$a|Archived discussion]]$1/g;
                    $outtxt=~s/\[\[\Q$old\E(?=\||\]\])/[[$new#$a/g;
                }
            } else {
                $api->warn("No mapping for $anchor in $title");
            }
        }

        if($intxt ne $outtxt){
            $res=$api->edit($tok, $outtxt, "Adjusting links to archived content", 1, 1);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to edit $title: ".$res->{'error'}."\n");
            }
        }
    }

    return 14400;
}

1;