User:Joe's Null Bot/source

From Wikipedia, the free encyclopedia
use MediaWiki::API;

   # Gently pruned from the standard exclusion code to hardcode $user and $opt
   sub allowBots {
    my($text) = @_;

    my $user = "Joe's Null Bot";

    return 0 if $text =~ /{{[nN]obots}}/;
    return 1 if $text =~ /{{[bB]ots}}/;
    if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){
        return 1 if $1 eq 'all';
        return 0 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?1:0;
    }
    if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){
        return 0 if $1 eq 'all';
        return 1 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?0:1;
    }
    return 1;
  }

  # Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds
  # (that is, one day).  Mostly here to avoid ten copies of the bot running if nothing can run for 
  # ten days.
  $epoch = time();
  $listcount =0;
  $purgecount = 0;

  sub check_expirations() {
    my $secs = time() - $epoch;
    if ($secs > 86400) {
       die "Bot expired of old age.\n";
    }
    if ($purgecount > 250) {
       die "This category is looking disturbingly large. Quitting.\n";
    }
  }

  # Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until
  # the worst-case server lag is better than 5s. 
  my $mw = MediaWiki::API->new();
  $mw->{config}->{api_url} = 'http://en.wikipedia.org/w/api.php';

  # Delay/retry parameters
 
  $mw->{config}->{max_lag}         = 5;        # Tell MediaWiki to put us off it there's a 5s+ db lag out there
  $mw->{config}->{max_lag_delay}   = 10;  # ..and to wait 10s between retries
  $mw->{config}->{max_lag_retries} = 4;    # ..and to only make 4 retries before dropping back to our code

  # Our own delay parameters
  $standardelay      = 15;  # Wait 15s or more between purge calls....
  $longdelay         = 900;  # ...if the API puts us off several times in a row, take a 15-minute break

  my $articles = null;

  # login
  while (1) { 
    if ($mw->login( { lgname => "Joe's Null Bot", lgpassword => 'REDACTED' } )) {
      last;
    }

    check_expirations();

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  } 

  # Get list of articles
  while (1) {
    check_expirations();

    $articles = $mw->list ( {
       action => 'query',
       list => 'categorymembers',
       cmtitle => 'Category:BLP articles proposed for deletion by days left',
       cmlimit => 'max'} );  

    if ($articles) { last; }

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  }

  # scan through the articles...
  foreach (@{$articles}) {
      my $thistitle = $_->{title};
      $listcount++;

      print  "T: " . $thistitle . "\n";

      while (1) {
         check_expirations();

         my $pagehash = $mw->get_page( { title => $thistitle } );
         if ($pagehash) { last; }

         if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
            sleep $longdelay;   
          } else {
            die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
          } 
      }

      sleep $standardelay;     # There's no hurry!
     
      if (allowBots($pagehash->{'*'})) {       
        $purgecount++;

        while (1) {
             check_expirations();

             # …and purge each one 
             my $apires = $mw->api( {
                 action => 'purge', titles => $thistitle, forcelinkupdate => 1} );
                      
             if ($apires) { last; }

             if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
                sleep $longdelay;   
             } else {
                die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
             } 
         }
      } else {
        print "….DENIED\n";
      }
  }

  print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . "seconds.\n";