Jump to content

User:AnomieBOT/source/tasks/DatedCategoryCreator.pm: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: DatedCategoryCreator: * Allow certain cats to have backdated dates (currently just Category:Wikipedia articles in need of updating). TagDater: * Don't check years in missingcats logic, due to the above change.
AnomieBOT (talk | contribs)
Updating published sources: DatedCategoryCreator: * Tighten limits on creating future-dated non-empty categories.
Line 48: Line 48:
my %oldcats=(
my %oldcats=(
'Wikipedia articles in need of updating' => 1
'Wikipedia articles in need of updating' => 1
);

# Categories to allow future dates, as well as the valid timespan in seconds
# (otherwise, the default is 86400)
my %futurecats=(
'Articles containing potentially dated statements' => 31622400
);
);


Line 59: Line 65:
$monthnum{$months[$i]}=$i+1;
$monthnum{$months[$i]}=$i+1;
}
}

my $curyear;


sub new {
sub new {
Line 94: Line 98:
my @t=gmtime(time()+86400);
my @t=gmtime(time()+86400);
my $curmonth=strftime('%B %Y', @t);
my $curmonth=strftime('%B %Y', @t);
$curyear=($t[5]+1900);


# Part 1: Create needed categories for the current month
# Part 1: Create needed categories for the current month
Line 180: Line 183:
return 0 if exists($skipcat{$parent});
return 0 if exists($skipcat{$parent});
}
}
return 0 if $y>$curyear;
my $dt = $futurecats{$parent} // 86400;
my @t=gmtime(time()+$dt);
return 0 if $y>$t[5]+1900;
return 0 if($y==$t[5]+1900 && $monthnum{$m}>$t[4]+1);
return 0 if(!exists($oldcats{$parent}) && $y<2004);
return 0 if(!exists($oldcats{$parent}) && $y<2004);
$txt.='}}';
$txt.='}}';

Revision as of 18:14, 13 January 2013

package tasks::DatedCategoryCreator;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     DatedCategoryCreator
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 64
Status:   Approved 2012-05-30
+BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 65
+Status:   Approved 2012-06-04
Created:  2012-05-07

Create needed categories under [[:Category:Wikipedia maintenance categories sorted by month]] and [[:Category:Wikipedia categories sorted by month]].

=end metadata

=cut

use utf8;
use strict;

use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/:time bunchlist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

# If any categories do not follow the "CATNAME from MONTH" paradigm, list them
# here.
my %catmap=(
    'Category needed' => 'Uncategorized from ',
    'Articles needing expert attention by month' => 'Articles needing expert attention from ',
);
my %rcatmap=reverse %catmap;

# Categories to skip entirely
my %skipcat=(
    'Articles with invalid date parameter in template' => 1
);

# Categories to allow old dates
my %oldcats=(
    'Wikipedia articles in need of updating' => 1
);

# Categories to allow future dates, as well as the valid timespan in seconds
# (otherwise, the default is 86400)
my %futurecats=(
    'Articles containing potentially dated statements' => 31622400
);

# Non-config globals
my @months=qw/January February March April May June July August September October November December/;
my $monthre=join('|', @months);
$monthre=qr/$monthre/;

my %monthnum=();
for(my $i=0; $i<@months; $i++){
    $monthnum{$months[$i]}=$i+1;
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2012-05-30.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 64]]

=for info
Supplemental BRFA approved 2012-06-04.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 65]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my ($res, $iter);

    $api->task('DatedCategoryCreator',0,0,qw/d::Talk d::Redirects d::Templates/);

    my %did=();

    my @t=gmtime(time()+86400);
    my $curmonth=strftime('%B %Y', @t);

    # Part 1: Create needed categories for the current month
    for my $cat ('Wikipedia categories sorted by month', 'Wikipedia maintenance categories sorted by month'){
        $res=$api->query(
            generator    => 'categorymembers',
            gcmtitle     => "Category:$cat",
            gcmnamespace => 14,
            gcmtype      => 'subcat',
            gcmlimit     => 'max',
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to get members of $cat: ".$res->{'error'}."\n");
            return 60;
        }
        $iter=$api->iterator(
            titles => bunchlist(500, map monthly_for_cat($_->{'title'}, $curmonth), values %{$res->{'query'}{'pages'}}),
        );
        while($_=$iter->next){
            return 0 if $api->halting;

            if(!$_->{'_ok_'}){
                $api->warn("Failed to retrieve members in ".$iter->iterval.": ".$_->{'error'}."\n");
                return 60;
            }

            next unless exists($_->{'missing'});

            $did{$_->{'title'}}=1;
            $res=$self->make_dated_cat($api, $_->{'title'}, 'current month');
            return $res if $res;
        }
    }

    # Part 2: Look through pages in [[:Category:Articles with invalid date
    # parameter in template]] for more categories to create
    $iter=$api->iterator(
        generator => 'categorymembers',
        gcmtitle  => 'Category:Articles with invalid date parameter in template',
        gcmlimit  => '500',
        prop      => 'categories',
        cllimit   => 'max',
    );
    while(my $p=$iter->next){
        for my $c (@{$p->{'categories'} // []}) {
            # Minimal sanity check; make_dated_cat() will do more
            next unless $c->{'title'}=~/ $monthre \d{4}$/;
            next if exists($did{$c->{'title'}});
            $did{$c->{'title'}}=1;
            $res=$self->make_dated_cat($api, $c->{'title'}, 'non-empty month');
            return $res if $res;
        }
    }

    # No more pages to check for now
    return 7200;
}

# Figure out the name for the monthly cat based on the parent and the month
sub monthly_for_cat {
    my ($title, $month)=@_;

    my $t=$title;
    $t=~s/^Category://;
    return () if exists($skipcat{$t});
    return 'Category:'.($catmap{$t} // "$t from ").$month;
}

# Create the dated category, if it seems to be sane
sub make_dated_cat {
    my ($self,$api,$title,$for)=@_;

    # Figure out the parent cat, and calculate the parameters for {{Monthly
    # clean up category}}.
    my $parent=$title;
    my $txt='{{Monthly clean up category';
    $parent=~s/^Category://;
    $parent=~s/($monthre) (\d{4})$//;
    my ($m,$y)=($1,$2);
    if(exists($rcatmap{$parent})){
        $parent=$rcatmap{$parent};
        $txt.="|cat=$parent";
    } else {
        return 0 unless $parent=~s/ from $//;
        return 0 if exists($skipcat{$parent});
    }
    my $dt = $futurecats{$parent} // 86400;
    my @t=gmtime(time()+$dt);
    return 0 if $y>$t[5]+1900;
    return 0 if($y==$t[5]+1900 && $monthnum{$m}>$t[4]+1);
    return 0 if(!exists($oldcats{$parent}) && $y<2004);
    $txt.='}}';

    # Check whether the parent cat actually exists and is a subcat of the meta
    # cat.
    my $res=$api->query(
        titles       => "Category:$parent",
        prop         => 'categories',
        clcategories => 'Category:Wikipedia maintenance categories sorted by month|Category:Wikipedia categories sorted by month',
    );
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to check: ".$res->{'content'}."\n");
        return 300;
    }
    $res=(values %{$res->{'query'}{'pages'}})[0];
    if(exists($res->{'missing'})){
        $api->warn("Did not create category $title, because parent category Category:$parent does not exist");
        return 0;
    }
    my $type;
    if(grep $_->{'title'} eq 'Category:Wikipedia maintenance categories sorted by month', @{$res->{'categories'}}){
        $type='maintenance category';
    } elsif(grep $_->{'title'} eq 'Category:Wikipedia categories sorted by month', @{$res->{'categories'}}){
        $type='category';
    } else {
        $api->warn("Did not create category $title, because parent category $parent is not in Category:Wikipedia maintenance categories sorted by month or Category:Wikipedia categories sorted by month");
        return 0;
    }

    # Ok, create it!
    my $tok=$api->edittoken($title, EditRedir=>1);
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded'){
        # Skip protected and excluded pages
        return 0;
    }
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
        return 0;
    }
    return 0 unless exists($tok->{'missing'});

    $api->log("Creating monthly dated $type for $for in $title");
    my $r=$api->edit($tok, $txt, "Creating monthly dated $type for $for", 1, 1);
    if($r->{'code'} ne 'success'){
        $api->warn("Write failed on $title: ".$r->{'error'}."\n");
        return 0;
    }

    return undef;
}

1;