Jump to content

User:Joe Decker/afcpend.pl

From Wikipedia, the free encyclopedia
use MediaWiki::Bot;
use Encode;

$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;#$
   # Gently pruned from the standard exclusion code to hardcode $user and $opt
   sub allowBots {
    my($text) = @_;

    my $user = "Joe's Null Bot";

    return 0 if $text =~ /{{[nN]obots}}/;
    return 1 if $text =~ /{{[bB]ots}}/;
    if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){
        return 1 if $1 eq 'all';
        return 0 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?1:0;
    }
    if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){
        return 0 if $1 eq 'all';
        return 1 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?0:1;
    }
    return 1;
  }

  # Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds
  # (that is, one day).  Mostly here to avoid ten copies of the bot running if nothing can run for 
  # ten days.
  $epoch = time();
  $listcount =0;
  $purgecount = 0;

  sub check_expirations() {
    my $secs = time() - $epoch;
    if ($secs > 86400) {
       die "Bot expired of old age.\n";
    }
    if ($purgecount > 1500) {
       die "This category is looking disturbingly large. Quitting.\n";
    }
  }

  # Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until
  # the worst-case server lag is better than 5s. 
  my $mw = MediaWiki::Bot->new({
    assert => 'bot',
    host => 'en.wikipedia.org',
    protocol => 'https',
    operator => "Joe's Null Bot",
    #debug => "2",
}) or die "new mwbot fail";
  $mw->{config}->{api_url} = 'https://en.wikipedia.org/w/api.php';

  # Delay/retry parameters
 
  $mw->{config}->{max_lag}         = 5;        # Tell MediaWiki to put us off it there's a 5s+ db lag out there
  $mw->{config}->{max_lag_delay}   = 10;       # ..and to wait 10s between retries
  $mw->{config}->{max_lag_retries} = 4;        # ..and to only make 4 retries before dropping back to our code

  # Our own delay parameters
  $standardelay      = 2;  # Wait 15s or more between purge calls....
  $longdelay         = 50;  # ...if the API puts us off several times in a row, take a 15-minute break

  my $articles = null;

  # login
  while (1) { 
    if ($mw->login( { username => "Joe's Null Bot", password => 'REDACTED' } )) {
      last;
    }

    check_expirations();

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
    } elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
       print "bad gateway\n";
       sleep $longdelay;
    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  } 

  # Get list of articles
  while (1) {
    check_expirations();

    $articles = $mw->list ( {
       action => 'query',
       list => 'categorymembers',
       cmtitle => 'Category:Pending AfC submissions',
       cmlimit => 'max'} );  

    if ($articles) { last; }

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
     } elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
       print "bad gateway\n";
       sleep $longdelay;

    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  }

  # scan through the articles...
  foreach (@{$articles}) {
      my $thistitle = $_->{title};
      $listcount++;

      print  "T: " . encode("iso-8859-1", $thistitle) . "\n";

      while (1) {
         check_expirations();

         my $pagehash = $mw->get_page( { title => $thistitle } );
         if ($pagehash) { last; }

         if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
            sleep $longdelay;   

         } elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
            print "bad gateway\n";
            sleep $longdelay;

          } else {
            die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
          } 
      }

      sleep $standardelay;     # There's no hurry!
     
      if (allowBots($pagehash->{'*'})) {       
        $purgecount++;

        while (1) {
             check_expirations();

             # …and purge each one 
             my $apires = $mw->api( {
                 action => 'purge', titles => $thistitle, forcelinkupdate => 1} );
                      
             if ($apires) { last; }

             if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
                sleep $longdelay;   
             } elsif ($mw->{error}->{details} =~ /Bad Gateway/) {
                print "bad gateway\n";
                sleep $longdelay;
             } else {
                die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
             } 
         }
      } else {
        print "….DENIED\n";
      }
  }

  my $apires = $mw->api( { action => 'purge', titles => "Category:AfC pending submissions by age"} );
                      
  if ($apires) { 
     print "AFCpsba: cat purged\n"; 
  } else { 

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
          die "AFCpsba: delayed for replag\n";
    } else {
          die "AFCpsba: " . $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    }
  }

  print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . " seconds.\n";