Jump to content

User:HBC NameWatcherBot/source: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
H (talk | contribs)
No edit summary
H (talk | contribs)
update
Line 12: Line 12:
#use Text::DoubleMetaphone qw( double_metaphone );
#use Text::DoubleMetaphone qw( double_metaphone );


my $version_number = '0.15b';
my $version_number = '0.16b';
my $VERSION = 'HBC NameWatcherBot v'.$version_number.' - Now has REGEX and LABEL command';
my $VERSION = 'HBC NameWatcherBot v'.$version_number.' - Config loads only on change and startup now';
my(@whitelist,@blacklist,%param,$on_server,@job_list,%delayed_names);
my(@whitelist,@blacklist,%param,$on_server,@job_list,%delayed_names);
my $timing = 0;
my $timing = 0;
Line 136: Line 136:
}
}
}
}
add_job([\&check_parameters,$c],600);
unless (lc($param{'Status'}) eq 'on')
unless (lc($param{'Status'}) eq 'on')
{
{
Line 167: Line 166:
push(@whitelist,$1) if ($line =~ m|^;(.*)$|);
push(@whitelist,$1) if ($line =~ m|^;(.*)$|);
}
}
add_job([\&check_whitelist,$c],600);
}
}


Line 190: Line 188:
}
}
}
}
add_job([\&check_blacklist,$c],600);
}
}


Line 216: Line 213:
my($c,$name) = @_;
my($c,$name) = @_;
my $display_name = "'$name'";
my $display_name = "'$name'";
my $master_name = $name;
warn "Checking name: $display_name\n";
warn "Checking name: $display_name\n";
foreach my $white_word (@whitelist) {$name =~ s|$white_word||gi;} # Remove occurances of whitelisted words
foreach my $white_word (@whitelist) {$name =~ s|$white_word||gi;} # Remove occurances of whitelisted words
Line 231: Line 229:
}
}
}
}
add_job([\&bad_name,$c,$name,$ra_offenses],0) if
add_job([\&bad_name,$c,$master_name,$ra_offenses],0) if
(
(
scalar(@{$ra_offenses}) ||
scalar(@{$ra_offenses}) ||
Line 451: Line 449:
my $statement = ${$event->{'args'}}[0];
my $statement = ${$event->{'args'}}[0];
my $pattern = chr(0x03).'03(.*?)'.chr(0x03);
my $pattern = chr(0x03).'03(.*?)'.chr(0x03);
if (($statement =~ m|New user account|) && ($statement =~ "$pattern"))
add_job([\&check_name,$c,$1],0) if (($statement =~ m|New user account|) && ($statement =~ "$pattern"));
add_job([\&check_blacklist,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Blacklist|);
{
add_job([\&check_name,$c,$1],0);
add_job([\&check_whitelist,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Whitelist|);
add_job([\&check_parameters,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Control panel|);
}
}
}



Revision as of 18:32, 25 May 2007

This source is released under GFDL. Enjoy.

Note This code uses a version of the MediaWiki module that I repaired, all official versions are not functioning with the current mediawiki servers, so I fixed it. If you wish to reproduce this script you can e-mail me for the repaired mediawiki.pm file. HighInBC (Need help? Ask me) 03:37, 3 January 2007 (UTC)

# This script is released under the GFDL license
use strict;
use MediaWiki;
use Net::IRC;
use URI::Escape;
use Data::Dumper;
#use Text::DoubleMetaphone qw( double_metaphone );

my $version_number = '0.16b';
my $VERSION = 'HBC NameWatcherBot v'.$version_number.' - Config loads only on change and startup now';
my(@whitelist,@blacklist,%param,$on_server,@job_list,%delayed_names);
my $timing = 0;

## Wiki connection setup
open(PASS,'password');                  # A file with only the password, no carraige return
sysread(PASS, my $password, -s(PASS));  # No password in sourcecode.
close(PASS);
open(USER,'username');                  # A file with only the username, no carraige return
sysread(USER, my $username, -s(USER));  #
close(USER);
warn "Connecting to Wikipedia...\n";
my $c                  =   MediaWiki->new;
$c->setup
                        ({
                          'bot' => {'user' => $username,'pass' => $password},
                          'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'}
                        }) || die "Failed to log in\n";
my $whoami              =  $c->user();
warn "$whoami v$version_number connected\n";

## IRC connection setup

my $server              = 'irc.wikimedia.org';		# The irc server your bot is to reside
my $port                = 6667;				# The port of above mentioned server
my $bot_name            = 'HBC_UWBot';			# The nick for the bot
my @channels            = ('#en.wikipedia');		# The channel(s) to connect to
my $reconnect_delay     = 30;				# Time to wait after disconnect to reconnect (In Seconds)
my $irc = new Net::IRC;					# Net::IRC our master IRC object
warn "Connecting to IRC...\n";
my $conn = $irc->newconn(				#|Connect to the server
                         Nick           => $bot_name,	#|
                         Server         => $server,	#|
                         Port           => $port,	#|
                        );
$conn->add_handler   ('endofmotd'	=> \&on_connect);	# Set off by a connect to server
$conn->add_handler   ('disconnect'	=> \&on_disconnect);	# Set off when connection to server is lost
$conn->add_handler   ('public'		=> \&on_public); 	# Set off by public messages

## General program setup

my(%parameter_expected) =
 (
  'Status'              => 1,
  'Default target'      => 1,
  'Long name'           => 1,
  'Repeating characters' => 1,
  'Repeating numbers' => 1,
  'Write rate'          => 1,
  'Recheck duration'    => 1,
  'Recheck interval'    => 1,
 );
my(@parameter_required) =
 (
  'Status',
  'Default target',
 );
my(%default_params) =
 (
  'Write rate'          => 10,
  'Long name'           => 0,
  'Repeating characters' => 0,
  'Repeating numbers' => 0,
 );

add_job([\&check_parameters,$c],0);
add_job([\&check_whitelist,$c],0);
add_job([\&check_blacklist,$c],0);
add_job([\&debug_report],5);
#add_job([\&check_name,$c,''],6); # for testing strings

## Program loop

while (1)                               # Infinite loop, a serpent biting it's own tail.
  {
  sleep(.1);                             # Important in all infinite loops to keep it calm
  $irc->do_one_loop();
  my (@kept_jobs);                      # A place to put jobs not ready to run yet
  while (my $job = shift(@job_list))    # Go through each job pending
    {
    my($r_job , $timing) = @{$job};
    if ($timing < time())               # If it is time to run it then run it
      {
      if (ref($r_job) eq 'ARRAY')       # Callback style, reference to an array with a sub followed by paramaters
        {
        my $cmd = shift(@{$r_job});
        &{$cmd}(@{$r_job});
        }
      elsif (ref($r_job) eq 'CODE')     # Otherwise just the reference to the sub
        {
        &{$r_job};
        }
      }
    else                                # If it is not time yet, save it for later
      {
      push(@kept_jobs , $job)
      }
    }
  push (@job_list , @kept_jobs);        # Keep jobs that are still pending
  }

sub add_job     # Command to add a job to the queue
  {
  my ($r_job , $timing) = @_;
  push (@job_list , [$r_job , (time()+$timing)]);
  }

#___________________Event_loop_triggered_subs___________________#

sub check_parameters
  {
  warn "Checking parameters...\n";
  my $c = shift;
  AGAIN:
  my $page = $c->get('User:HBC NameWatcherBot/Control panel','r')->content();
  %param = ();
  foreach my $line (split("\n",$page))
    {
    if ($line =~ m|^;(.*?):(.*)$|)
      {
      $param{$1} = $2 if ($parameter_expected{$1});
      }
    }
  unless (lc($param{'Status'}) eq 'on')
    {
    warn "Bot is not turned on in the control panel\n" ;
    warn "\tWaiting 1 minute and trying again";
    sleep 60;
    goto AGAIN;
    }
  foreach my $key (@parameter_required)
    {
    unless ($param{$key})
      {
      warn "Missing required parameter: $key\n";
      warn "\tWaiting 1 minute and trying again";
      sleep 60;
      goto AGAIN;
      }
    }
  %param = (%default_params,%param);
  }

sub check_whitelist
  {
  warn "Checking whitelist...\n";
  my $c = shift;
  my $page = $c->get('User:HBC NameWatcherBot/Whitelist','r')->content();
  @whitelist = ();
  foreach my $line (split("\n",$page))
    {
    push(@whitelist,$1) if ($line =~ m|^;(.*)$|);
    }
  }

sub check_blacklist
  {
  warn "Checking blacklist...\n";
  my $c = shift;
  my $page = $c->get('User:HBC NameWatcherBot/Blacklist','r')->content();
  @blacklist = ();
  foreach my $line (split("\n",$page))
    {
    if ($line =~ m|^;(.*?)(:(.*))?$|)
      {
      my $rh_entry = {};
      ${$rh_entry}{word} = $1;
      if ($2)
        {
        my @flags = split(',',$3);
        ${$rh_entry}{flags} = parse_flags(\@flags);
        }
      push(@blacklist,$rh_entry);
      }
    }
  }

sub parse_flags
  {
  my $ra_flags = shift;
  my %has_flag;
  foreach my $flag (@{$ra_flags})
    {
    if ($flag =~ m|(.*)\((.*)\)|)
      {
      my (@params) = split('\|',$2);
      $has_flag{$1} = \@params;
      }
    else
      {
      $has_flag{$flag} = 1
      }
    }
  return \%has_flag;
  }

sub check_name
  {
  my($c,$name) = @_;
  my $display_name = "'$name'";
  my $master_name = $name;
  warn "Checking name: $display_name\n";
  foreach my $white_word (@whitelist) {$name =~ s|$white_word||gi;} # Remove occurances of whitelisted words
  my $ra_offenses = [];
  foreach my $rh_black_word (@blacklist)
    {
    my $word = ${$rh_black_word}{word};
    if (${$rh_black_word}{flags}{'REGEX'})
      {
      push(@{$ra_offenses},$rh_black_word) if ($name =~ m|$word|i);
      }
    else
      {
      push(@{$ra_offenses},$rh_black_word) if (index(lc($name),lc($word)) > -1);
      }
    }
  add_job([\&bad_name,$c,$master_name,$ra_offenses],0) if
        (
         scalar(@{$ra_offenses}) ||
         (($param{'Long name'}) && (length($name) > $param{'Long name'})) ||
         (($param{'Repeating characters'}) && (detect_repeatition($name,$param{'Repeating characters'}))) ||
         (($param{'Repeating numbers'}) && (detect_repeatition_numbers($name,$param{'Repeating numbers'})))
        );
  return 1;
  }

sub bad_name
  {
  my ($c, $name, $ra_offenses) = @_;
  my(@offense_reports) = ('* {{userlinks|'.$name.'}}');
  my $need_rechecking = 0; # default no reason to report
  my $need_header = 0;
  my $has_edited = user_has_edited($c,$name);
  my $is_blocked = is_blocked($c,$name);
  my $target_page = $param{'Default target'};
  push(@offense_reports,':* This user has edited at least one time.') if ($has_edited);
  if (($param{'Long name'}) && (length($name) > $param{'Long name'}))
    {
    push(@offense_reports, ":* At ".length($name)." characters, this name exceeds ".$param{'Long name'}." characters");
    }
  if (($param{'Repeating characters'}) && (my $char = detect_repeatition($name,$param{'Repeating characters'})))
    {
    push(@offense_reports, ":* This name has the character \"'''".$char."'''\" repeating ".$param{'Repeating characters'}." or more than times in a row.");
    }
  if (($param{'Repeating numbers'}) && (detect_repeatition_numbers($name,$param{'Repeating numbers'})))
    {
    push(@offense_reports, ":* This name has ".$param{'Repeating numbers'}." or more numbers in a row.");
    }
  foreach my $rh_blackword (@{$ra_offenses})
    {
    my $rh_has_flag = ${$rh_blackword}{flags};
    my $label = ((${$rh_has_flag}{'LABEL'}) ? ("called '''".${${$rh_has_flag}{'LABEL'}}[0]."'''") : ("'''${$rh_blackword}{word}'''"));
    my $name_pattern = '('.${$rh_blackword}{word}.')';
    my $match;
    $name =~ m|$name_pattern|i;
    $match = " The portion that matched was '''$1'''." if ($1);
    my(@report) = (((${$rh_has_flag}{'REGEX'}) ? (":* Matches the regular expression $label.$match") : (":* Matches the literal pattern $label.")));
    if  (${$rh_has_flag}{'WAIT_TILL_EDIT'})
      {
      if ($has_edited)
        {
        push (@report, "::* This report was delayed until the user edited.");
        delete($delayed_names{$name});
        }
      else
        {
        warn "\t$name has not edited, not counting match to the string $label at this point, rechecking in $param{'Recheck interval'} seconds.\n";
        $need_rechecking = 1 unless (scalar(@offense_reports) > 1);
        next;
        }
      }
    $need_rechecking = 0;
    warn Dumper
     ({
      'Name'	=> $name,
      'Flags'	=> $rh_has_flag,
      'Word'	=> ${$rh_blackword}{'word'},
     });
    if  (${$rh_has_flag}{'ALTERNATE_TARGET'})
      {
      $target_page = ${${$rh_has_flag}{'ALTERNATE_TARGET'}}[0];
      warn "Alternate target: $target_page\n";
      }
    if  (${$rh_has_flag}{'USE_HEADER'})
      {
      $need_header = 1;
      }
    if  (${$rh_has_flag}{'LOW_CONFIDENCE'})
      {
      push (@report, "::* The string $label is prone to false positives, please take extra care insuring this name is actaully a violation before blocking.");
      }
    if  (${$rh_has_flag}{'NOTE'})
      {
      push (@report, "::* The string $label has a comment associated with it: ".${${$rh_has_flag}{'NOTE'}}[0]);
      }
    if  (${$rh_has_flag}{'SOCK_PUPPET'})
      {
      push (@report, "::* The string $label is often associate with sock puppets of [[User:".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."]] {{#ifexist:Category:Suspected Wikipedia sockpuppets of ".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."|(see [[:Category:Suspected Wikipedia sockpuppets of ".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."]])}}");
      }
    push (@offense_reports,@report);
    }
  if ($need_rechecking)
    {
    $delayed_names{$name} ||= time();
    my $report_age = (time() - $delayed_names{$name});
    if (($report_age > $param{'Recheck duration'}) || ($is_blocked))
      {
      warn "Forgetting about $name, it has been $report_age seconds since creation and no edits.\n";
      warn "\tThe user is blocked you see.\n" if ($is_blocked);
      delete($delayed_names{$name});
      }
    else
      {
      warn "Scheduling recheck of $name\n";
      add_job([\&check_name,$c,$name],$param{'Recheck interval'});
      }
    return 1;
    }
  unshift(@offense_reports, "== Account [[User:$name]] created ==") if $need_header;
  my $report = (join("\n",@offense_reports)." ~~~~\n");
  warn "Writing to: $target_page\n";
  my $page = $c->get($target_page, 'rw');
  if (($page->content() =~ m|<\!-- HBC NameWatcherBot allowed -->|) && !($is_blocked))
    {
    while ($page->{'content'} !~ m|\n\n$|) {$page->{'content'} = $page->{'content'}."\n";}
    $page->{'content'} = ($page->{'content'}.$report);
    $page->{'summary'} = "Reporting [[Special:Contributions/$name|$name]] ([[Special:Blockip/$name|block]]).";
    $page->save();
    warn ($page->{'summary'}."\n");
#    warn ($page->{'content'}."\n\n\n");
    sleep($param{'Write rate'});
    warn "\n";
    }
  elsif ($is_blocked)
    {
    warn "Skipping, user is already blocked.\n";
    }
  else
    {
    warn "I don't have permission to write to '$target_page'\n";
    return;
    }
  }

sub user_has_edited
  {
  my ($c,$name) = @_;
  my $url = 'http://en.wikipedia.org/w/query.php?what=contribcounter&format=xml&titles=User:'.uri_escape($name);
  ($c->{ua}->get($url)->content() =~ m|<count>(\d+)</count>|) || return 0;
  return ($1 > 0);
  }

sub detect_repeatition
  {
  my $string = shift;
  my $limit = shift;
  my $last_char;
  my $current_rep = 1;
  while (my $char = substr($string,0,1,''))
    {
    if ($char eq $last_char)
      {
      $current_rep++;
      }
    else
      {
      $last_char = $char;
      $current_rep = 1;
      }
    return $char if ($current_rep >= $limit);
    }
  return 0;
  }

sub detect_repeatition_numbers
  {
  my $string = shift;
  my $limit = shift;
  my $last_char;
  my $current_rep = 1;
  while (my $char = substr($string,0,1,''))
    {
    if ($char =~ m|^[0-9]$|)
      {
      $current_rep++;
      }
    else
      {
      $current_rep = 0;
      }
    return 1 if ($current_rep >= $limit);
    }
  return 0;
  }

sub is_blocked
  {
  my ($c,$name) = @_;
  my $url = $c->{index}.'?title=Special:Ipblocklist&ip='.uri_escape($name);
  my $data = $c->{ua}->get($url)->content(); # Get blocklist info for user
  return ($data =~ m|</a>\) blocked <a href|)       # If the user is currently blocked
  }

#___________________IRC_triggered_subs___________________#

sub on_connect  # triggered when motd is done... otherwords when you connect to server.
  {
  my $self      = shift;
  my $event     = shift;
  warn "Connected to IRC server '$server:$port' as '$bot_name'.\n";
  foreach my $chan (@channels)
    {
    add_job([\&get_on_channel , $self , lc($chan)] , 0);
    }
  $on_server = 1;
  }

sub get_on_channel
  {
  my $self      = shift;
  my $chan      = shift;

  $chan = lc($chan);
  warn "Attempting to join $chan.\n";
  my $response = $self->join($chan);                                                 # try to join channel
  die("Got join response of: $response\n") unless ($response == 20);
  warn "Joined.\n\n";
  }

sub on_public
  {
  my $self      = shift;
  my $event     = shift;

  my $statement = ${$event->{'args'}}[0];
  my $pattern = chr(0x03).'03(.*?)'.chr(0x03);
  add_job([\&check_name,$c,$1],0) if (($statement =~ m|New user account|) && ($statement =~ "$pattern"));
  add_job([\&check_blacklist,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Blacklist|);
  add_job([\&check_whitelist,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Whitelist|);
  add_job([\&check_parameters,$c],0) if ($statement =~ m|User:HBC NameWatcherBot/Control panel|);
  }

sub on_disconnect # triggered when server connection is lost.
  {
  my $self      = shift;
  my $event     = shift;

  my $reason = ${$event->{'args'}}[0];
  warn "Connection lost:".$reason."\n";
  warn "Waiting $reconnect_delay seconds before reconnecting to avoid hammering.\n";
  ($on_server) = (undef , undef);
  sleep($reconnect_delay);      # Avoid getting hammer-blocked by waiting before reconnect.
  warn "Attempting to reconnect to $server:$port\n";
  until ($self->connect()){print "Retry...\n"}  # Start again from the beggining by reconnecting, uses same logfile
  }

#___________________Debug___________________#


sub debug_report
  {
  warn Dumper
   ({
#    'Whitelist'        => \@whitelist,
#    'Blacklist'        => \@blacklist,
    'Params'	=> \%param,
    'Delayed'   => \%delayed_names,
   });
  add_job([\&debug_report],300);
  }