Jump to content

Wikipedia:Bots/Requests for approval/MadmanBot 8/Source code

From Wikipedia, the free encyclopedia
<?php

define('PILLAR', 'PILLAR');
require_once 'class.pillar.php';
$pillar = Pillar::ini_launch('madmanbot.ini');

// Enumerate pages that contain an external link to the PubMed search engine
$offset = NULL;
$extlinksto = array();
do
{
	try
	{
		$result = $pillar->cursite->get_extlinksto('www.ncbi.nlm.' . 
			'nih.gov/pubmed/', 5000, $offset, 0, 'http');
	}
	catch (PillarException $ex)
	{
		break;
	}
	$extlinksto = array_merge($result, $extlinksto);
}
while ($offset != NULL);

foreach ($extlinksto as $extlinkto)
{
	// Skip each external link not to an abstracts
	if (!preg_match('|^http://www.ncbi.nlm.nih.gov/pubmed/(\d)+$|', 
		$extlinkto['url']))
	{
		continue;
	}

	try
	{
		$page = new Page($pillar->cursite, $extlinkto['title']);
	}
	catch (PillarException $ex)
	{
		continue;
	}

	// Return all templates from each page
	$continue = NULL;
	$templates = array();
	do
	{
		try
		{
			$result = $page->get_templates(5000, $continue);
		}
		catch (PillarException $ex)
		{
			break;
		}
		$templates = array_merge($result, $templates);
	}
	while ($continue != NULL);

	// One of the transclusions in each page's text has changed
	$changed = FALSE;

	// The modifier for string offsets, adjusted by each changes
	$offset = 0;

	// The text with which to replace each page
	$text = $page->get_text();

	foreach ($templates as $template)
	{
		// Remove the Template: prefix from each template
		$template = substr($template, 9);

		// Escape each template for use in a regular expression
		$template_ = str_replace('/', '\/', $template);

		// Match all transclusions of each template
		preg_match_all('/\{\{(Template:)?' . $template_ . 
			'\b.*?\}\}/is', $text, $transclusions, 
			PREG_SET_ORDER | PREG_OFFSET_CAPTURE);

		foreach ($transclusions as $transclusion)
		{
			// Skip each transclusion without a pmid parameter
			if (!preg_match('/\|\s*pmid\s*=\s*(\d+)/i', 
				$transclusion[0][0], $matches))
			{
				continue;
			}

			// Remove a url parameter from each transclusion that 
			// is an external link to an abstract with the given 
			// pmid
			$new_transclusion = preg_replace('|\|\s*' . 
				'url\s*=\s*http://www\.ncbi' . 
				'\.nlm\.nih\.gov/pubmed/' . 
				$matches[1] . '(\?.+?)?(#.+?)?' . 
				'\s*(\r?\n?)?|i', 
				'', $transclusion[0][0]);

			// Skip each transclusion that hasn't changed
			if ($transclusion[0][0] == $new_transclusion)
			{
				continue;
			}

			// Note that one of the transclusions in each page's 
			// text has changed
			$changed = TRUE;

			// Change each transclusion in each page's text
			// Each page's text before each transclusion
			$text = substr($text, 0, $transclusion[0][1] + 
				$offset) . 
				// Each changed transclusion
				$new_transclusion . 
				// Each page's text after each transclusion
				substr($text, $transclusion[0][1] + 
				$offset + strlen($transclusion[0][0]));

			// Adjust the modifier for string offsets
			$offset += (strlen($new_transclusion) - 
				strlen($transclusion[0][0]));
		}
	}

	// Skip each page in which one of the transclusions hasn't changed
	if (!$changed)
	{
		continue;
	}

	// Edit each page
	try
	{
		$page->put($text, 'Automatically removed redundant url ' . 
			'parameter per [[Wikipedia:Bots/Requests for ' . 
			'approval/MadmanBot 8|approved]], [[Wikipedia:Bot ' . 
			'requests#Convert_PubMed_links_to_citations_with_' . 
			'.7Cpmid.3D|requested]] task.', TRUE);
	}
	catch (PillarException $ex)
	{
		continue;
	}
}

?>