User:Polbot/source/Judges.pl

From Wikipedia, the free encyclopedia
use strict;
use Perlwikipedia;
use LWP::UserAgent;

my $firstletter = shift;
my $startat = shift;
my $test = 0;
my $soonest_next_op = time;

print "\nStarting polbot\n" ;
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "Logging in\n";
my $login_status=$pw->login('Polbot','(bot password)');
die "I can't log in." unless ($login_status eq 0);

# Get exceptions (to skip)
my @exceptions = ();
print "Getting list of completed judges to skip.\n";
my $todo_list = $pw->get_text('Wikipedia:WikiProject Law/United States federal judges - finished');
my @lines = split(/\n/, $todo_list);
foreach my $line (@lines) {
	# Ignore non-listed lines
	if ($line =~ /^\*\s*\[\[([^]]*)\]\]/) {
		push @exceptions, $1;
	}
}

# Pull from FJC
print "Getting list of all judges starting with $firstletter\n";
my @judge_ids = ();
my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter;
print " $url\n";
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/6.0");
my $res = $ua->get($url);
die "could not connect" unless ($res->is_success);
my $html = $res->content;
while ($html =~ m/<A HREF=\"\/servlet\/tGetInfo\?jid=(\d+)\">([^<]*)</g) {
	my $thisjid = $1;
	my $thisname = $2;
	if ($thisname ge $startat) {
		unless (grep {$_ eq $thisname} @exceptions) {
			push @judge_ids, $thisjid;
		}
	}
}

print scalar(@judge_ids) . " judges found.\n\n";

foreach my $jid (@judge_ids) {
		my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid";
		print "\n$jid: ";
		$res = $ua->get($url);
		die "could not connect" unless ($res->is_success);
		$html = $res->content;
		$html =~ s/\`/'/g;
		
		my @eds = ();
		my @jus = ();
		my @pcs = ();
		my @jcats = ();
		
		my $rev_name;
		my $name;
		my $last_name;
		my $art_name;
		my $persondata_name;
		my $birth_date;
		my $birth_year;
		my $birth_loc;
		my $death_date;
		my $death_year;
		my $death_loc;
		my $pronoun = "He";
		my $active = 0;
		my $wiki_out;
		
		# initial change
		$html =~ s/(\d)\-\<BR\>/$1-the present<BR>/;
		
		# extract name
		$html =~ m/\<FONT SIZE\=\+1 COLOR\=BLACK\>\<B\>([^\n]*?) *\<\/B\>\<\/FONT\>/m;
		$rev_name = $1;
		$rev_name =~ s/ +/ /g;
		$rev_name =~ s/\[//g;
		$rev_name =~ s/\]//g;
		$rev_name =~ m/^(.*?)\, (.*?)( Jr\.| II| III| IV)?$/;
		$last_name = $1;
		$name = "$2 $last_name$3";
		if ($pw->get_text("$name") =~ /\w/) {
			$art_name = "User:Polbot/fjc/" . $name;
		} else {
			$art_name = $name;
		}
		print "==$rev_name at [[$art_name]]==\n";
		
		# extract gender
		if ($html =~ m/<BR><B>Gender:<\/B> Female/) {	$pronoun = "She";	}
	
		# extract birth and death info
		if ($html =~ m/<DD>Born +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?<BR>/) {
			$birth_date = "$1 $2";
			$birth_year = $3;
			$birth_loc = $4;
			$birth_loc =~ s/^, +in +//;
		} elsif ($html =~ m/<DD>Born +(\d+)( +in +[^<]*)?<BR>/) {
			$birth_year = $1;
			$birth_loc = $2;
			$birth_loc =~ s/^ +in +//;
		}
		if ($html =~ m/<BR>Died +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?<BR>/) {
			$death_date = "$1 $2";
			$death_year = $3;
			$death_loc = $4;
			$death_loc =~ s/^, +in +//;
		} elsif ($html =~ m/<BR>Died +(\d+)( +in +[^<]*)?<BR>/) {
			$death_year = $1;
			$death_loc = $2;
			$death_loc =~ s/^ +in +//;
		}
		$birth_loc = Expand_states($birth_loc);
		$death_loc = Expand_states($death_loc);
		#print "birth: '$birth_date', '$birth_year', '$birth_loc'\n";
		#print "death: '$death_date', '$death_year', '$death_loc'\n";

		# Extract education
		if ($html =~ m/<BR>\s*<BR><B>Education:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) {
			my $ed_string = $1;
			@eds = split(/<[Bb][Rr]><[Bb][Rr]>/, $ed_string);
			foreach my $ed (@eds) {
				if ($ed =~ m/^(.*), (.*), (\d+)$/) {
					$ed = "$pronoun received a [[$2]] from [[$1]] in $3";
				} elsif ($ed =~ m/^Read law, (\d+)$/) {
					$ed = "$pronoun [[read law]] in $1";
				} elsif ($ed =~ m/^(.*), (\d+)$/) {
					$ed = "$pronoun graduated from [[$1]] in $2";
				}
				#print " ED: $ed\n";
			}
		}
				
		# Extract Professional Career
		if ($html =~ m/<B>Professional Career:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) {
			my $pc_string = $1;
			@pcs = split(/ *<[Bb][Rr]> */, $pc_string);
			foreach my $pc (@pcs) {
				if ($pc =~ m/^Private practice, (.*?), (\d+)\-(\d+|the present)$/) {
					$pc = "$pronoun was in private practice of law in [[$1]] from $2 to $3";
				} elsif ($pc =~ m/^Private practice, (.*?), (\d+)$/) {
					$pc = "$pronoun was in private practice of law in [[$1]] in $2";
				} elsif ($pc =~ m/^Judge, (.*?), (\d+)\-(\d+|the present)$/) {
					$pc = "$pronoun was a judge to the $1 from $2 to $3";
				} elsif ($pc =~ m/^Judge, (.*?), (\d+)$/) {
					$pc = "$pronoun was a judge to the $1 in $2";
				} elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)\-(\d+|the present)$/) {
					$pc = "$pronoun was in the [[United States $1]]$2 from $3 to $4";
				} elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)$/) {
					$pc = "$pronoun was in the [[United States $1]]$2 in $3";					
				} elsif ($pc =~ m/^(.*), (\d+)\-(\d+|the present)$/) {
					$pc = "$pronoun was a $1 from $2 to $3";
				} elsif ($pc =~ m/^(.*), (\d+)$/) {
					$pc = "$pronoun was a $1 in $2";
				}
				#print "PC: $pc\n";
			}
		}

		# Extract judgeships
		if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) {
			my $ju_string = $1;
			@jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string);
			foreach my $ju (@jus) {
				if ($ju =~ s/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States District Court for the $1]]. /) {
					push @jcats, "Judges of the United States District Court for the $1";
				}
				if ($ju =~ s/Justice, U\. S\. District Court for the District of Columbia \[Supreme Court of the District of Columbia\]\s*<[Bb][Rr]>/$last_name was a federal judge to the [[United States District Court for the District of Columbia]]. /) {
					push @jcats, "Judges of the United States District Court for the District of Columbia";
				}
				if ($ju =~ s/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States circuit court]] $1. /) {
				  push @jcats, "Judges of the United States circuit courts";
			  }
				if ($ju =~ s/Judge, U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/$last_name was a federal judge to the [[United States Court of Appeals for the D.C. Circuit]]. /) {
					push @jcats, "Judges of the United States Court of Appeals for the D.C. Circuit";	
				}
				if ($ju =~ s/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States Court of Appeals $1]]. /) {
					push @jcats, "Judges of the United States Court of Appeals $1";	
				}
				if ($ju !~ m/Service terminated/i) {
					$active = 1;
				}
			}
		}
		
		# Mash together.
		$wiki_out = "{{Cleanup FJC Bio}}\n'''$name''' ";
		if ($birth_date) {
			if ($death_date) {
				$wiki_out .= "($birth_date, $birth_year \&ndash\; $death_date, $death_year) ";
			} elsif ($death_year) {
				$wiki_out .= "($birth_date, $birth_year \&ndash\; $death_year) ";
			} else {
				$wiki_out .= "(born $birth_date, $birth_year) ";
			}
		} elsif ($birth_year) {
			if ($death_date) {
				$wiki_out .= "($birth_year \&ndash\; $death_date, $death_year) ";
			} elsif ($death_year) {
				$wiki_out .= "($birth_year\&ndash\;$death_year) ";
			} else {
				$wiki_out .= "(born $birth_year) ";
			}
		} else {
			if ($death_date) {
				$wiki_out .= "(died $death_date, $death_year) ";
			} elsif ($death_year) {
				$wiki_out .= "(died $death_year) ";
			}
		}
		if ($death_year) {
			$wiki_out .= "was a ";
		} else {
			if ($active) {
				$wiki_out .= "is a ";
			} else {
				$wiki_out .= "is a former ";
			}
		}
		$wiki_out .= "[[United States federal judge]].\n\n";
		if ($birth_loc) {
			$wiki_out .= "$last_name was born in [[$birth_loc]]. ";
		}
		foreach my $ed (@eds) {
			$wiki_out .= "$ed. ";
		}
		foreach my $pc (@pcs) {
			$wiki_out .= "$pc. ";
		}
		$wiki_out .= "\n\n";
		foreach my $ju (@jus) {
			$wiki_out .= "$ju\n\n";
		}
		if ($death_loc) {
			$wiki_out .= "$pronoun died in [[$death_loc]].\n\n";
		}
		$persondata_name = $rev_name;
		$persondata_name =~ s/\'//g;
		$persondata_name =~ s/\b(\w+)\b/\u\L$1/g;
		$wiki_out .= "==External links==\n* {{FJC Bio|$jid}}\n\n";
		$wiki_out .= '<!-- Metadata: see [[Wikipedia:Persondata]] -->' . "\n{{Persondata\n|NAME=$persondata_name\n";
		$wiki_out .= "|ALTERNATIVE NAMES=\n|SHORT DESCRIPTION=[[United States federal judge]]\n|DATE OF BIRTH=";
		if ($birth_date) { 
			$wiki_out .= "$birth_date, $birth_year\n";
		} else {
			$wiki_out .= "$birth_year\n";
		} 
		$wiki_out .= "|PLACE OF BIRTH=";
		if ($birth_loc) {
			$wiki_out .= "[[$birth_loc]]";
		}
		$wiki_out .= "\n|DATE OF DEATH=";
		if ($death_date) { 
			$wiki_out .= "$death_date, $death_year\n";
		} else {
			$wiki_out .= "$death_year\n";
		} 
		$wiki_out .= "|PLACE OF DEATH=";
		if ($death_loc) {
			$wiki_out .= "[[$death_loc]]";
		}
		$wiki_out .= "\n}}\n{{DEFAULTSORT:$rev_name}}\n";
		if ($birth_year) {
			$wiki_out .= "[[Category:$birth_year births]]\n";
		}
		if ($death_year) {
			$wiki_out .= "[[Category:$death_year deaths]]\n";
		} else {
			$wiki_out .= "[[Category:Living people]]\n";
		}
		foreach my $jcat (@jcats) {
			$wiki_out .= "[[Category:$jcat]]\n";
		}
				
		# Final substitutions - multiple
		$wiki_out =~ s/Nominated by /$last_name was nominated by /g;
		$wiki_out =~ s/Received a recess appointment from /$last_name received a recess appointment from /g;
		$wiki_out =~ s/Confirmed by the Senate/$pronoun was confirmed by the United States Senate/g;
		$wiki_out =~ s/vacated by (.*?);/vacated by [[$1]]./g;
		$wiki_out =~ s/Reassigned /$pronoun was reassigned on /g;
		$wiki_out =~ s/Service terminated on /$last_name<!-- -->'s service was terminated on /g;
		$wiki_out =~ s/He was a State attorney general, ([^\.\;]*?) from/$pronoun was the state attorney general of $1 from/g;
		$wiki_out =~ s/ was a Member of the faculty, / was a member of the faculty of /g;
		$wiki_out =~ s/ was a Faculty, / was a member of the faculty of /g;
		$wiki_out =~ s/on (\w+ \d+, \d+), and received commission on \1/on $1, and received commission the same day/g;
		$wiki_out =~ s/(attorney|general|treasurer|secretary|senator), /$1 of /g;
		$wiki_out =~ s/ a ([AEIO])/ an $1/g;
		$wiki_out =~ s/, (\d+)\-(\d+) from / from $1 to $2 and from /g;
		$wiki_out =~ s/, (\d+) from / in $1 and from /g;
		
		
		# Final substitutions - single
		$wiki_out =~ s/recess appointment/[[recess appointment]]/;
		$wiki_out =~ s/senior status/[[senior status]]/;
		$wiki_out =~ s/U.S. Attorney(,| from)/[[United States Attorney]]$1/;
		$wiki_out =~ s/United States Senate/[[United States Senate]]/;
		$wiki_out =~ s/\[\[J\.D\.\]\]/[[Juris Doctor|J.D.]]/;
		$wiki_out =~ s/Law clerk/[[law clerk]]/;
		
		# Presidents
		$wiki_out =~ s/(from|by) (George W. Bush)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (William J. Clinton)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (George H.W. Bush)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Ronald Reagan)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Jimmy Carter)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Gerald Ford)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Richard M. Nixon)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Lyndon B. Johnson)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (John F. Kennedy)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Dwight D. Eisenhower)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Harry S Truman)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Franklin D. Roosevelt)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Herbert Hoover)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Calvin Coolidge)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Warren G. Harding)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Woodrow Wilson)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (William H. Taft)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Theodore Roosevelt)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (William McKinley)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Benjamin Harrison)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Grover Cleveland)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Chester A. Arthur)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (James A. Garfield)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Rutherford B. Hayes)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Ulysses Grant)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Andrew Johnson)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Abraham Lincoln)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (James Buchanan)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Franklin Pierce)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Millard Fillmore)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Zachary Taylor)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (James K. Polk)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (John Tyler)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Martin Van Buren)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Andrew Jackson)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (John Quincy Adams)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (James Monroe)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (James Madison)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (Thomas Jefferson)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (John Adams)/$1 President [[$2]]/;
		$wiki_out =~ s/(from|by) (George Washington)/$1 President [[$2]]/;
		

		# Write
		if ($test) {
			print "Output to file\n";
			open(outfile, ">judges.txt");
			print outfile $wiki_out;
			die;
		}

		$|=1;
		print "Waiting " . ($soonest_next_op - time) . " secs... ";
		$|=1;
		while (time < $soonest_next_op) {};				
		$soonest_next_op = time + 9;
		
		if ($name eq $art_name) {
			$pw->edit($art_name, $wiki_out, "Auto-generating new article based on $url");
			my $talkmessage = "{{WPBiography\n|living=";
			if ($death_year) {
				$talkmessage .= 'no';
			} else {
				$talkmessage .= 'yes';
			}
			$talkmessage .= "\n|class=start\n|priority=low\n|needs-infobox=yes\n|politician-work-group=yes\n}}\n\nThis article was automatically created by a perl script. It could use a human's loving touch. ~~~~";
			$pw->edit("Talk:$art_name", $talkmessage, "Auto-adding WPbiography template");
			
			my $listsofar = $pw->get_text("User:Polbot/fjc");
			$listsofar .= "|-\n| $rev_name || yes || [[$art_name]]\n";
			$pw->edit("User:Polbot/fjc", $listsofar, "Adding [[$art_name]]");
		} else {
			$wiki_out =~ s/\[\[Category/[[:Category/g;
			$pw->edit($art_name, $wiki_out, "Auto-generating subpage based on $url");
			my $otherpage = $pw->get_text("$name");
			if ($otherpage =~ m/\#\s*Redirect\s*\[\[\s*(.*?)\s*\]\]/is) {
				$name = $1;
			}
			my $talksofar = $pw->get_text("Talk:$name");
			$talksofar .= "\n==Bot-created subpage==\n\nA temporary subpage at [[$art_name]] was automatically created by a perl script, based on [$url this article] at the [[Biographical Directory of Federal Judges]]. The subpage should either be merged into this article, or moved and disambiguated. ~~~~\n";
			$pw->edit("Talk:$name", $talksofar, "Auto-adding link to subpage at [[$art_name]]");
			
			my $listsofar = $pw->get_text("User:Polbot/fjc");
			$listsofar .= "|-\n| $rev_name || no || [[$art_name]]\n";
			$pw->edit("User:Polbot/fjc", $listsofar, "Adding [[$art_name]]");
		}
	
		print "Article created.\n";
}

sub Expand_states {
	my $place = shift;
	
	$place =~ s/AL/Alabama/;
	$place =~ s/AK/Alaska/;
	$place =~ s/AZ/Arizona/;
	$place =~ s/AR/Arkansas/;
	$place =~ s/CA/California/;
	$place =~ s/CO/Colorado/;
	$place =~ s/CT/Connecticut/;
	$place =~ s/DE/Delaware/;
	$place =~ s/DC/District of Columbia/;
	$place =~ s/FL/Florida/;
	$place =~ s/GA/Georgia/;
	$place =~ s/HI/Hawaii/;
	$place =~ s/ID/Idaho/;
	$place =~ s/IL/Illinois/;
	$place =~ s/IN/Indiana/;
	$place =~ s/IA/Iowa/;
	$place =~ s/KS/Kansas/;
	$place =~ s/KY/Kentucky/;
	$place =~ s/LA/Louisiana/;
	$place =~ s/ME/Maine/;
	$place =~ s/MD/Maryland/;
	$place =~ s/MA/Massachusetts/;
	$place =~ s/MI/Michigan/;
	$place =~ s/MN/Minnesota/;
	$place =~ s/MS/Mississippi/;
	$place =~ s/MO/Missouri/;
	$place =~ s/MT/Montana/;
	$place =~ s/NE/Nebraska/;
	$place =~ s/NV/Nevada/;
	$place =~ s/NH/New Hampshire/;
	$place =~ s/NJ/New Jersey/;
	$place =~ s/NM/New Mexico/;
	$place =~ s/NY/New York/;
	$place =~ s/NC/North Carolina/;
	$place =~ s/ND/North Dakota/;
	$place =~ s/OH/Ohio/;
	$place =~ s/OK/Oklahoma/;
	$place =~ s/OR/Oregon/;
	$place =~ s/PA/Pennsylvania/;
	$place =~ s/PR/Puerto Rico/;
	$place =~ s/RI/Rhode Island/;
	$place =~ s/SC/South Carolina/;
	$place =~ s/SD/South Dakota/;
	$place =~ s/TN/Tennessee/;
	$place =~ s/TX/Texas/;
	$place =~ s/UT/Utah/;
	$place =~ s/VT/Vermont/;
	$place =~ s/VA/Virginia/;
	$place =~ s/WA/Washington/;
	$place =~ s/WV/West Virginia/;
	$place =~ s/WI/Wisconsin/;
	$place =~ s/WY/Wyoming/;
 	
	return $place;
}