Wikipedia:Bots/Requests for approval/MadmanBot 8/Source code
Appearance
<?php
define('PILLAR', 'PILLAR');
require_once 'class.pillar.php';
$pillar = Pillar::ini_launch('madmanbot.ini');
// Enumerate pages that contain an external link to the PubMed search engine
$offset = NULL;
$extlinksto = array();
do
{
try
{
$result = $pillar->cursite->get_extlinksto('www.ncbi.nlm.' .
'nih.gov/pubmed/', 5000, $offset, 0, 'http');
}
catch (PillarException $ex)
{
break;
}
$extlinksto = array_merge($result, $extlinksto);
}
while ($offset != NULL);
foreach ($extlinksto as $extlinkto)
{
// Skip each external link not to an abstracts
if (!preg_match('|^http://www.ncbi.nlm.nih.gov/pubmed/(\d)+$|',
$extlinkto['url']))
{
continue;
}
try
{
$page = new Page($pillar->cursite, $extlinkto['title']);
}
catch (PillarException $ex)
{
continue;
}
// Return all templates from each page
$continue = NULL;
$templates = array();
do
{
try
{
$result = $page->get_templates(5000, $continue);
}
catch (PillarException $ex)
{
break;
}
$templates = array_merge($result, $templates);
}
while ($continue != NULL);
// One of the transclusions in each page's text has changed
$changed = FALSE;
// The modifier for string offsets, adjusted by each changes
$offset = 0;
// The text with which to replace each page
$text = $page->get_text();
foreach ($templates as $template)
{
// Remove the Template: prefix from each template
$template = substr($template, 9);
// Escape each template for use in a regular expression
$template_ = str_replace('/', '\/', $template);
// Match all transclusions of each template
preg_match_all('/\{\{(Template:)?' . $template_ .
'\b.*?\}\}/is', $text, $transclusions,
PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
foreach ($transclusions as $transclusion)
{
// Skip each transclusion without a pmid parameter
if (!preg_match('/\|\s*pmid\s*=\s*(\d+)/i',
$transclusion[0][0], $matches))
{
continue;
}
// Remove a url parameter from each transclusion that
// is an external link to an abstract with the given
// pmid
$new_transclusion = preg_replace('|\|\s*' .
'url\s*=\s*http://www\.ncbi' .
'\.nlm\.nih\.gov/pubmed/' .
$matches[1] . '(\?.+?)?(#.+?)?' .
'\s*(\r?\n?)?|i',
'', $transclusion[0][0]);
// Skip each transclusion that hasn't changed
if ($transclusion[0][0] == $new_transclusion)
{
continue;
}
// Note that one of the transclusions in each page's
// text has changed
$changed = TRUE;
// Change each transclusion in each page's text
// Each page's text before each transclusion
$text = substr($text, 0, $transclusion[0][1] +
$offset) .
// Each changed transclusion
$new_transclusion .
// Each page's text after each transclusion
substr($text, $transclusion[0][1] +
$offset + strlen($transclusion[0][0]));
// Adjust the modifier for string offsets
$offset += (strlen($new_transclusion) -
strlen($transclusion[0][0]));
}
}
// Skip each page in which one of the transclusions hasn't changed
if (!$changed)
{
continue;
}
// Edit each page
try
{
$page->put($text, 'Automatically removed redundant url ' .
'parameter per [[Wikipedia:Bots/Requests for ' .
'approval/MadmanBot 8|approved]], [[Wikipedia:Bot ' .
'requests#Convert_PubMed_links_to_citations_with_' .
'.7Cpmid.3D|requested]] task.', TRUE);
}
catch (PillarException $ex)
{
continue;
}
}
?>