Jump to content

User:Cyberpower678/ListGen.php

From Wikipedia, the free encyclopedia
<?php
/* Created by Cyberpower678 */

//Edit these to suit the criteria for eligibility
$registrationTimestamp = "20221001000000";  //When the user needs to have registered by
$votingStart = "20221129000000";            //When the voting period starts
$votingEnd = "20221212235959";              //When the voting period ends
$editCountEndTimestamp = "20221101000000";  //The timestamp of when the edit count requirements need to be fulfilled
$inactivityPeriod = "$editCountEndTimestamp - 1 year";    //When the activity period ends
$editCountRequirement = 150;    //Overall edit count requirement
$qualifyingNamespaces = [ 0 ];    //Leave empty for all namespaces
$activityEditCount = 10;        //Edits required within the activity as defined by inactivity value
$activityNamespaces = [];       //Leave empty for all namespaces

//Users to exclude that are a member of this category
$excludeCategoryMembers = [
	'Wikipedia_alternative_accounts',
	'Wikipedia_doppelganger_accounts',
	'Deceased_Wikipedians',
	'Users_who_do_not_wish_to_receive_ACE_messages'
];

$forcedCategoryExclusions = [];

//Don't edit this
if( !isset( $argv[1] ) || !isset( $argv[2] ) || !isset( $argv[3] ) || !isset( $argv[4] ) || !isset( $argv[5] ) || !isset( $argv[6] ) ) {
	echo "Missing operational arguments\n";
	exit( 1 );
}

//Location of DB within WMF Cloud
$dbHost = "enwiki.labsdb";
$dbDB = "enwiki_p";
$dbPort = 3306;

//Don't edit these
$ini = parse_ini_file( "{$_SERVER['HOME']}/replica.my.cnf" );

$dbUser = $ini['user'];
$dbPass = $ini['password'];

unset( $ini );

$removeBlocked = (bool) (int) $argv[1];
$removeCategories = (bool) (int) $argv[2];
$removeInactive = (bool) (int) $argv[3];
$securePoll = (bool) (int) $argv[4];
$removeBots = (bool) (int) $argv[5];
$removeVanished = (bool) (int) $argv[6];

if( $removeBots ) {
	$forcedCategoryExclusions[] = 'All_Wikipedia_bots';
}

$dbObject = mysqli_connect( $dbHost, $dbUser, $dbPass, $dbDB, $dbPort );

if( $dbObject === false ) {
	echo "Unable to connect to the DB, please verify connections settings.\n";
	echo "Error details: ";
	echo mysqli_errno( $dbObject );
	echo " - ";
	echo mysqli_error( $dbObject );
	exit( 1 );
}

$offset = 0;
$qualifyingUsersSP = [];
$qualifyingUsersMM = [];

$batchSize = 10000;

$scriptStart = time();

$inactivityPeriod = strtotime( $inactivityPeriod );
$editCountEndTimestamp = strtotime( $editCountEndTimestamp );

$timeStart = time();

//Let's get busy with actually populating a list
echo "Counting the number of users to check...\n";

//Get a total count of users being thoroughly vetted.  Ideal for tracking the script's total progress.
$countSQL = "Select count(*) as userCount
									from user
									where user_id > 0
									  AND (user_registration < $registrationTimestamp OR user_registration is null)
									  AND user_editcount >= $editCountRequirement ";
//If desired, filter out users blocked through the end of the voting period
if( $removeBlocked ) $countSQL .= "   AND user_id not in (select ipb_user
									                      from ipblocks
									                      where ipb_user > 0
									                        AND ipb_sitewide = 1
									                        AND (ipb_expiry > '$votingEnd' OR ipb_expiry = 'infinity')) ";
//If desired, filter out bot flagged accounts
if( $removeBots ) $countSQL .= "AND user_id not in (select ug_user from user_groups where ug_group = 'bot')";
$countSQL .= ";";

$res = mysqli_query( $dbObject, $countSQL );
//$res = true;
if( $res ) {
	$result = mysqli_fetch_assoc( $res );
	//$result['userCount'] = 171946;

	echo "Found {$result['userCount']} potentially qualifying users...\n";

	$totalUserCount = $result['userCount'];

	mysqli_free_result( $res );
	unset( $result, $res );
} else {
	echo "Unable to get total users\n";
	exit( 1 );
}

$processedUsers = 0;

//We get the job done faster by doing a multiquery.  Get maximum size of string we can send to the DB.
$res = mysqli_query( $dbObject, "show variables like 'max_allowed_packet';" );
if( $res ) {
	$result = mysqli_fetch_assoc( $res );
	$maxQuerySize = $result['Value'];
} else {
	echo "Unable to retrieve max query size\n";
	exit( 1 );
}

do {
	if( isset( $res ) ) {
		mysqli_free_result( $res );
		unset( $res, $result );
	}
	//Pull a list of user having meet the registration deadline, and basic edit count requirements
	$userListSQL = "Select user_id, user_name, user_registration, actor_id, ";
	if( $removeInactive ) {
		$userListSQL .= "(select rev_timestamp from revision_userindex ";
		if( !empty( $activityNamespaces ) ) $userListSQL .= "join page on page_id = rev_page ";
		$userListSQL .= "where rev_actor = actor_id
						  AND rev_timestamp > " . date( "YmdHis", $inactivityPeriod ) . "
						  AND rev_timestamp < " . date( "YmdHis", $editCountEndTimestamp ) . " ";
		if( !empty( $activityNamespaces ) ) $userListSQL .= "AND page_namespace IN (" .
		                                                    implode( ", ", $activityNamespaces ) . ") ";
		$userListSQL .= "limit " . ( $activityEditCount - 1 ) . ",1) is not null AND ";
	}
	$userListSQL .= "(select rev_timestamp from revision_userindex ";
	if( !empty( $qualifyingNamespaces ) ) $userListSQL .= "join page on page_id = rev_page ";
	$userListSQL .= "where rev_actor = actor_id ";
	if( !empty( $qualifyingNamespaces ) ) $userListSQL .= "AND page_namespace IN (" .
	                                                      implode( ", ", $qualifyingNamespaces ) . ") ";
	$userListSQL .= "AND rev_timestamp < " . date( "YmdHis", $editCountEndTimestamp ) . " ";
	$userListSQL .= "limit " . ( $editCountRequirement - 1 ) . ",1) is not null as is_eligible ";
	$userListSQL .= "from user
					join actor_revision on actor_user = user_id
					where user_id > $offset
					  AND (user_registration < $registrationTimestamp OR user_registration is null)
					  AND user_editcount >= $editCountRequirement ";
	//If desired, filter out users that are at least blocked through the end of the voting period
	if( $removeBlocked ) $userListSQL .= "AND user_id not in (select ipb_user
											                      from ipblocks
											                      where ipb_user > 0
											                        AND ipb_sitewide = 1
											                        AND (ipb_expiry > '$votingEnd' OR ipb_expiry = 'infinity')) ";
	//If desired, filter out bot flagged account
	if( $removeBots ) $userListSQL .= "AND user_id not in (select ug_user from user_groups where ug_group = 'bot') ";
	//This optimizes the query for faster pagination
	$userListSQL .= "order by user_id asc limit $batchSize;";

	$res = mysqli_query( $dbObject, $userListSQL );

	$multiQuery = "";
	$multiList = [];
	while( $result = mysqli_fetch_assoc( $res ) ) {
		$offset = $result['user_id'];

		if( $result['is_eligible'] == 1 &&
		    ( $removeVanished === false ||
		        (stripos( $result['user_name'], 'vanished' ) === false && stripos( $result['user_name'], 'renamed' ) === false )
		    )
		) {
			//This directs how to format the list for the appropriate use.
			$qualifyingUsersMM[] = "User talk:{$result['user_name']}";
			$qualifyingUsersSP[] = "{$result['user_name']}@enwiki";
		}

		$processedUsers++;
	}

	//Give a progress report and an ETA on completion.
	$toEcho = "Processed: $processedUsers of $totalUserCount; ETA: ";
	$seconds = ( ( $totalUserCount / $processedUsers ) * ( time() - $timeStart ) ) - ( time() - $timeStart );
	$minutes = floor( $seconds / 60 );
	$seconds = $seconds % 60;
	$hours = floor( $minutes / 60 );
	$minutes = $minutes % 60;
	$toEcho .= "$hours:" . str_pad( $minutes, 2, "0", STR_PAD_LEFT ) . ":" . str_pad( $seconds, 2, "0", STR_PAD_LEFT );
	$toEcho .= "; Qualifying users found: " . count( $qualifyingUsersMM ) . "\n";

	echo $toEcho;

} while( mysqli_num_rows( $res ) == $batchSize );

if( $removeCategories ) {
	$forcedCategoryExclusions = array_merge( $forcedCategoryExclusions, $excludeCategoryMembers );
}

echo "Eliminating users in defined categories...\n";
//Get a list of users in the defined list of categories
$query =
	"select page_title
	from page
	       join categorylinks on page_id = cl_from
	where cl_to IN ( '" . implode( "', '", $forcedCategoryExclusions ) . "' );";

$res = mysqli_query( $dbObject, $query );

//Remove them from the final list of qualifying users
while( $result = mysqli_fetch_assoc( $res ) ) {
	$result['page_title'] = str_replace( "_", " ", $result['page_title'] );
	$index = array_search( "User talk:{$result['page_title']}", $qualifyingUsersMM );
	if( $index !== false ) {
		unset( $qualifyingUsersMM[$index] );
		unset( $qualifyingUsersSP[$index] );
	}
}

//Write results to a file in the appropriate format.  Either Mass Message, or SecurePoll
file_put_contents( 'UserListMessageList',
                   "*{{#target:" . implode( "}}\n*{{#target:", $qualifyingUsersMM ) . "}}\n"
);
if( $securePoll === true ) file_put_contents( 'UserListSecurePoll', implode( "\n", $qualifyingUsersSP ) . "\n" );

//Output run stats
$seconds = time() - $scriptStart;
$minutes = floor( $seconds / 60 );
$seconds = $seconds % 60;
$hours = floor( $minutes / 60 );
$minutes = $minutes % 60;
$toEcho = "$hours:" . str_pad( $minutes, 2, "0", STR_PAD_LEFT ) . ":" . str_pad( $seconds, 2, "0", STR_PAD_LEFT );

echo "Script finished.  Final user count: " . count( $qualifyingUsersMM ) . "; Execution time: $toEcho\n";

//We're done.  Let's do it again sometime. :-)