User:Full-date unlinking bot/code: Difference between revisions
Appearance
Content deleted Content added
Escape other use of article name in a sql statement. |
Update date processing to handle ordinal dates (January 1st), ordinal-of dates (1st of January) and full piped dates (<<January 1|1 January>>). Also revised and generalized date list processing, |
||
Line 125: | Line 125: | ||
$editsummary = "Unlinking full-dates. [[User:Full-date unlinking bot|Details here]]. Codes: "; |
$editsummary = "Unlinking full-dates. [[User:Full-date unlinking bot|Details here]]. Codes: "; |
||
//========== Define regular expression date building blocks |
//========== Define regular expression date building blocks |
||
// Key: m=month, d=day, y=year, s = ordinal suffix, n = ordinal day (1st, 2nd, ...), |
|||
// x = day with optional ordinal suffix (1, 1st, 2, 2nd, etc.), o = "of" |
|||
// |
|||
// Root date components |
// Root date components |
||
$part_m = 'Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|' |
$part_m = '(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|' |
||
. 'July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?'; |
. 'July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)'; |
||
$part_d = '\d{1,2}'; |
$part_d = '\d{1,2}'; |
||
$part_s = '(?:st|nd|rd|th)'; // Ordinal suffix |
|||
$part_n = $part_d . $part_s; // Day with ordinal suffix |
|||
$part_x = $part_d . $part_s . '?'; // Day with optional ordinal suffix |
|||
$part_y = '\d{1,4}(?:[ _]BC)?'; |
$part_y = '\d{1,4}(?:[ _]BC)?'; |
||
Line 136: | Line 142: | ||
$part_c_m = '(' . $part_m . ')'; |
$part_c_m = '(' . $part_m . ')'; |
||
$part_c_d = '(' . $part_d . ')'; |
$part_c_d = '(' . $part_d . ')'; |
||
$part_c_n = '(' . $part_n . ')'; |
|||
$part_c_x = '(' . $part_x . ')'; |
|||
$part_c_y = '(' . $part_y . ')'; |
$part_c_y = '(' . $part_y . ')'; |
||
// The following "Nth of Month" dates are not redirects to standard day articles, so are not to be unlinked |
|||
$excl_nom = '(?!1st[ _]of[ _]May|5th[ _]of[ _]May|4th[ _]of[ _]July|6th[ _]of[ _]October|8th[ _]of[ _]November)'; |
|||
// Linked captured date components |
// Linked captured date components |
||
$part_lc_md = '\[\[' . $part_c_m . '[ _]' . $part_c_d . '\]\]'; // [[month day]] |
$part_lc_md = '\[\[' . $part_c_m . '[ _]' . $part_c_d . '\]\]'; // [[month day]] |
||
$part_lc_dm = '\[\[' . $part_c_d . '[ _]' . $part_c_m . '\]\]'; // [[day month]] |
$part_lc_dm = '\[\[' . $part_c_d . '[ _]' . $part_c_m . '\]\]'; // [[day month]] |
||
$ |
$part_lc_mn = '\[\[' . $part_c_m . '[ _]' . $part_c_n . '\]\]'; // [[month Nth]] |
||
$part_lc_nm = '\[\[' . $part_c_n . '[ _]' . $part_c_m . '\]\]'; // [[Nth month]] |
|||
$part_lc_nom = '\[\[' . $excl_nom . $part_c_n . '[ _]of[ _]' . $part_c_m . '\]\]'; // [[Nth of month]], excluding select dates |
|||
$part_lc_y = '\[\[' . $part_c_y . '\]\]'; // [[year]] |
|||
// Generalized piped date components |
|||
//========== Build regular expressions for dmy, mdy, and (less common) ymd formats |
|||
$part_mx = $part_m . '[ _]' . $part_x; // Month day(th) |
|||
$part_xm = $part_x . '[ _]' . $part_m; // day(th) Month |
|||
$part_nom = $excl_nom . $part_n . '[ _]of[ _]' . $part_m; // Nth of Month, excluding select dates |
|||
$part_gen_link = '(?:' . $part_mx . '|' . $part_xm . '|' . $part_nom . ')'; // m-d(th), d(th)-m, or d(th)-of-m |
|||
$part_gen_pipe = '(?:' . $part_x . '|' . $part_mx . '|' . $part_xm . '|' . $part_nom . ')'; // d(th), m-d(th), d(th)-m, or d(th)-of-m |
|||
$part_lc_piped = '\[\[' . $part_gen_link . '\|' . '(' . $part_gen_pipe . ')' . '\]\]'; // Only the piped text is captured |
|||
// Punctuation |
|||
$part_AMreg_punct = ', '; |
$part_AMreg_punct = ', '; |
||
$part_BRreg_punct = ' '; |
$part_BRreg_punct = ' '; |
||
$part_AModd_punct = '(?!, \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding comma + single space |
$part_AModd_punct = '(?!, \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding comma + single space |
||
$part_BRodd_punct = '(?! \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding single space |
$part_BRodd_punct = '(?! \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding single space |
||
$ |
$part_c_gen_punct = '( *(?:, *)?)'; // spaces and optional comma (any form) - captured |
||
$part_YMD_punct = ' *'; // Recognize only spaces (zero or more) between year and month-day |
|||
$regex_AMreg = '/' . $part_lc_md . $part_AMreg_punct . $part_lc_y . '/i'; |
|||
$regex_BRreg = '/' . $part_lc_dm . $part_BRreg_punct . $part_lc_y . '/i'; |
|||
$regex_AModd = '/' . $part_lc_md . $part_AModd_punct . $part_lc_y . '/i'; |
|||
$regex_BRodd = '/' . $part_lc_dm . $part_BRodd_punct . $part_lc_y . '/i'; |
|||
$regex_YMD = '/' . $part_lc_y . $part_YMD_punct . $part_lc_md . '/i'; |
|||
$replace_AM = '§~§$1 $2, $3'; // "§~§" is a marker (deleted later) that supports late list processing |
|||
$replace_BR = '§~§$1 $2 $3'; |
|||
$replace_YMD = '$1 $2 $3'; |
|||
// For information and review purposes, the above expressions are equivalent to: |
|||
// $regex_AMreg = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\], \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_BRreg = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\] \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_AModd = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\](?!, \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_BRodd = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\](?! \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_YMD = '/\[\[(\d{1,4}(?:[ _]BC)?)\]\] *\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\]/i' |
|||
//========== Define regular expressions for ISO 8601 like dates. |
|||
// Negative year forms are also recognized, but will likely never be encountered. Technically, |
|||
// ISO 8601 dates are only valid for years 1583 through 9999 of the Gregorian calendar, but |
|||
// we will not enforce those rules here. |
|||
$regex_ISO1 = '/' . '\[\[(-?\d{4}-\d{2}-\d{2})\]\]' . '/i'; // [[yyyy-mm-dd]] or [[-yyyy-mm-dd]] |
|||
$regex_ISO2 = '/' . '\[\[(-?\d{4})\]\]-\[\[(\d{2}-\d{2})\]\]' . '/i'; // [[yyyy]]-[[mm-dd]] or [[-yyyy]]-[[mm-dd]] |
|||
$replace_ISO1 = '$1'; |
|||
$replace_ISO2 = '$1-$2'; |
|||
//========== Define regular expressions to extend processing for date ranges and lists |
|||
// For date lists, build a single match and capture expression that allows any of the forms: |
|||
// [[mmm dd]], [[dd mmm]], [[mmm dd|dd]] and [[dd mmm|dd]] (and even [[mmm dd|]] and [[dd mmm|]]). |
|||
// Three values are captured -- first date part, second date part, and pipe text (or blank if not present). |
|||
// The first two date parts may be month+day, day+month, but not month+month or day+day. |
|||
$part_c_m_or_d = '(' . $part_d . '|' . $part_m . ')'; |
|||
$part_c_pipe_day = '((?:[|](?:' . $part_d . ')?)?)'; // Matches nothing, pipe + day. or pipe + empty string |
|||
$part_verify_m_and_d = '(?=[^\]|]*[A-Z])' . '(?=[^\]|]*\d)'; // Lookahead to verify m+d or d+m (not m+m or d+d) |
|||
$part_lc_general = '\[\[' . $part_verify_m_and_d . $part_c_m_or_d . '[ _]' . $part_c_m_or_d . $part_c_pipe_day . '\]\]'; |
|||
// Define words and punctuation that may appear between items of a date range or list. |
// Define words and punctuation that may appear between items of a date range or list. |
||
// Optional comma |
// Optional comma + whitespace + separator punctuation or word + more whitespace |
||
$part_list_commaopt = ',?'; |
$part_list_commaopt = ',?'; |
||
$part_list_spacing = '(?: | |<br */?>)*'; // Zero or more: Space, symbolic nb-space, line break |
$part_list_spacing = '(?: | |<br */?>)*'; // Zero or more: Space, symbolic nb-space, line break |
||
Line 201: | Line 185: | ||
$part_c_list_separator = '(' . $part_list_commaopt . $part_list_spacing . '(?:' . $part_list_word . $part_list_spacing . ')?)'; |
$part_c_list_separator = '(' . $part_list_commaopt . $part_list_spacing . '(?:' . $part_list_word . $part_list_spacing . ')?)'; |
||
// Final search expressions |
|||
// In the following expressions "§~§" and "~" are used as placeholders. They have no special meaning. |
|||
$regex_AMreg = '/' . $part_lc_md . $part_AMreg_punct . $part_lc_y . '/i'; |
|||
$regex_BRreg = '/' . $part_lc_dm . $part_BRreg_punct . $part_lc_y . '/i'; |
|||
$regex_AModd = '/' . $part_lc_md . $part_AModd_punct . $part_lc_y . '/i'; |
|||
$regex_BRodd = '/' . $part_lc_dm . $part_BRodd_punct . $part_lc_y . '/i'; |
|||
$regex_AMord = '/' . $part_lc_mn . $part_c_gen_punct . $part_lc_y . '/i'; |
|||
$regex_BRord = '/' . $part_lc_nm . $part_c_gen_punct . $part_lc_y . '/i'; |
|||
$regex_ordOf = '/' . $part_lc_nom . $part_c_gen_punct . $part_lc_y . '/i'; |
|||
$regex_piped = '/' . $part_lc_piped . $part_c_gen_punct . $part_lc_y . '/i'; |
|||
$regex_YMD = '/' . $part_lc_y . $part_YMD_punct . $part_lc_md . '/i'; |
|||
$regex_ISO1 = '/' . '\[\[(-?\d{4}-\d{2}-\d{2})\]\]' . '/i'; // [[yyyy-mm-dd]] or [[-yyyy-mm-dd]] |
|||
$regex_ISO2 = '/' . '\[\[(-?\d{4})\]\]-\[\[(\d{2}-\d{2})\]\]' . '/i'; // [[yyyy]]-[[mm-dd]] or [[-yyyy]]-[[mm-dd]] |
|||
// Note: Negative year forms are also recognized in the above patterns for ISO-8601-like dates, but |
|||
// will likely never be encountered. Technically, ISO 8601 dates are only valid for years 1583 through |
|||
// 9999 of the Gregorian calendar, but we will not enforce those rules here. |
|||
// Final replace expressions ("§~§" is a marker, deleted later, that supports date list processing) |
|||
// Define expression to search for a date list not anchored by a standard form of the mdy or |
|||
$replace_AM = '§~§$1 $2, $3'; |
|||
// dmy date. This will catch the cases where the date part immediately preceding the year |
|||
$replace_BR = '§~§$1 $2 $3'; |
|||
// is piped. Punctuation is left unchanged. |
|||
$replace_AMord = '§~§$1 $2$3$4'; // "(month) (dayth)(punct)(year)" |
|||
// Example: [[April 23]]/[[April 24|24]], [[1966]] |
|||
$replace_BRord = '§~§$1 $2$3$4'; // "(dayth) (month)(punct)(year)" |
|||
// ...will be replaced with "§~§April~23~~/§~§April~24~|24~, 1966". |
|||
$replace_ordOf = '§~§$1 of $2$3$4'; // "(dayth) of (month)(punct)(year)" |
|||
// ...which will later be cleaned up as "April 23/24, 1966" |
|||
$replace_piped = '§~§$1$2$3'; // (piped-text)(punc)(year) |
|||
$regex_list_base = '@' . $part_lc_general . $part_c_list_separator . $part_lc_general . '( *(?:, *)?)' . $part_lc_y . '@i'; |
|||
$replace_YMD = '$1 $2 $3'; |
|||
$replace_list_base = '§~§~$1~$2~$3~' . '$4' . '§~§~$5~$6~$7~' . '$8' . '$9'; |
|||
$replace_ISO1 = '$1'; |
|||
$replace_ISO2 = '$1-$2'; |
|||
// Date list search ("@" is used as a regex delimiter, since "/" is used in the eexpressions) |
|||
// Search for additional month-day or day-month parts to the left of already processed dates. |
|||
$ |
$regex_AMlist = '@' . $part_lc_md . $part_c_list_separator . '(?=§~§)' . '@i'; |
||
$regex_BRlist = '@' . $part_lc_dm . $part_c_list_separator . '(?=§~§)' . '@i'; |
|||
$replace_list_extend = '§~§~$1~$2~$3~' . '$4'; |
|||
$regex_AMordList = '@' . $part_lc_mn . $part_c_list_separator . '(?=§~§)' . '@i'; |
|||
$regex_BRordList = '@' . $part_lc_nm . $part_c_list_separator . '(?=§~§)' . '@i'; |
|||
$regex_ordOfList = '@' . $part_lc_nom . $part_c_list_separator . '(?=§~§)' . '@i'; |
|||
$regex_pipedList = '@' . $part_lc_piped . $part_c_list_separator . '(?=§~§)' . '@i'; |
|||
// Date list replace |
|||
// Convert intermediate date list string replacements with final unlinked form ("month day", "day month" or "day"). |
|||
$replace_AMlist = '§~§$1 $2$3'; // "(month) (day)(punct)..." |
|||
$regex_list_cleanup_nopipe = '/' . '§~§~([^~]*)~([^~]*)~[|]?~' . '/i'; |
|||
$replace_BRlist = '§~§$1 $2$3'; // "(day) (month)(punct)..." |
|||
$replace_list_cleanup_nopipe = '$1 $2'; |
|||
$replace_AMordList = '§~§$1 $2$3'; // "(month) (dayth)(punct)..." |
|||
$replace_BRordList = '§~§$1 $2$3'; // "(dayth) (month)(punct)..." |
|||
$replace_ordOfList = '§~§$1 of $2$3'; // "(dayth) of (month)(punct)..." |
|||
$replace_pipedList = '§~§$1$2'; // "(pipetext)(punct)..." |
|||
// For information and review purposes, the above expressions are equivalent to: |
|||
$regex_list_cleanup_pipetext = '/' . '§~§~([^~]*)~([^~]*)~[|]([^~]*)~' . '/i'; |
|||
// $regex_AMreg = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\], \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
$replace_list_cleanup_pipetext = '$3'; // Discard link target, replace with pipe text only |
|||
// $regex_BRreg = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\] \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_AModd = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\](?!, \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_BRodd = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\](?! \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i' |
|||
// $regex_YMD = '/\[\[(\d{1,4}(?:[ _]BC)?)\]\] *\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\]/i' |
|||
// Remove any remaining placeholders. |
// Remove any remaining placeholders. |
||
Line 251: | Line 263: | ||
if ($match_count > 0) { |
if ($match_count > 0) { |
||
$editsummary .= "BRodd(×" . $match_count . "), "; |
$editsummary .= "BRodd(×" . $match_count . "), "; |
||
} |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_AMord, $replace_AMord, $contents, -1, &$match_count); |
|||
if ($match_count > 0) { |
|||
$editsummary .= "AMord(×" . $match_count . "), "; |
|||
} |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_BRord, $replace_BRord, $contents, -1, &$match_count); |
|||
if ($match_count > 0) { |
|||
$editsummary .= "BRord(×" . $match_count . "), "; |
|||
} |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_ordOf, $replace_ordOf, $contents, -1, &$match_count); |
|||
if ($match_count > 0) { |
|||
$editsummary .= "ordOf(×" . $match_count . "), "; |
|||
} |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_piped, $replace_piped, $contents, -1, &$match_count); |
|||
if ($match_count > 0) { |
|||
$editsummary .= "piped(×" . $match_count . "), "; |
|||
} |
} |
||
Line 270: | Line 306: | ||
$editsummary .= "ISO2(×" . $match_count . "), "; |
$editsummary .= "ISO2(×" . $match_count . "), "; |
||
} |
} |
||
//========== Begin search and replace date lists |
//========== Begin search and replace date lists |
||
$ |
$AMlist_count = 0; |
||
$BRlist_count = 0; |
|||
$AMordList_count = 0; |
|||
// Search for two part date range or list where rightmost part did nor match one of the |
|||
$BRordList_count = 0; |
|||
// dmy or mdy patterns above. Will typically match "[[mmm dd]] - [[mmm dd|dd]], [[yyyy]]" |
|||
$ordOfList_count = 0; |
|||
// forms or variations. |
|||
$ |
$pipedList_count = 0; |
||
$contents = preg_replace($regex_list_base, $replace_list_base, $contents, -1, &$match_count); |
|||
if ($match_count > 0) { |
|||
$editsummary .= "Lists1(×" . $match_count . "), "; |
|||
} |
|||
$date_list_count += $match_count; |
|||
// Process additional date parts to the left of a previously identified date or list. |
// Process additional date parts to the left of a previously identified date or list. |
||
// Loop for a maximum or 10 iterations or until no more matches are found |
|||
for ($i = 0; $i < 10; $i++) { |
for ($i = 0; $i < 10; $i++) { |
||
$current_iteration_match_count = 0; |
|||
$match_count = 0; |
$match_count = 0; |
||
$contents = preg_replace($ |
$contents = preg_replace($regex_AMlist, $replace_AMlist, $contents, -1, &$match_count); |
||
$AMlist_count += $match_count; |
|||
if ($match_count == 0) break; |
|||
$ |
$current_iteration_match_count += $match_count; |
||
$match_count = 0; |
|||
$contents = preg_replace($regex_BRlist, $replace_BRlist, $contents, -1, &$match_count); |
|||
$BRlist_count += $match_count; |
|||
$current_iteration_match_count += $match_count; |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_AMordList, $replace_AMordList, $contents, -1, &$match_count); |
|||
$AMordList_count += $match_count; |
|||
$current_iteration_match_count += $match_count; |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_BRordList, $replace_BRordList, $contents, -1, &$match_count); |
|||
$BRordList_count += $match_count; |
|||
$current_iteration_match_count += $match_count; |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_ordOfList, $replace_ordOfList, $contents, -1, &$match_count); |
|||
$ordOfList_count += $match_count; |
|||
$current_iteration_match_count += $match_count; |
|||
$match_count = 0; |
|||
$contents = preg_replace($regex_pipedList, $replace_pipedList, $contents, -1, &$match_count); |
|||
$pipedList_count += $match_count; |
|||
$current_iteration_match_count += $match_count; |
|||
if ($current_iteration_match_count == 0) break; // No more list extensions found |
|||
} |
} |
||
// Add list extension counts to summary |
|||
if ($date_list_count > 0) { |
|||
if ($AMlist_count > 0) { |
|||
$editsummary .= "Lists2(×" . $date_list_count . "), "; |
|||
$editsummary .= "AMlist(×" . $AMlist_count . "), "; |
|||
} |
|||
if ($BRlist_count > 0) { |
|||
$editsummary .= "BRlist(×" . $BRlist_count . "), "; |
|||
} |
|||
if ($AMordList_count > 0) { |
|||
$editsummary .= "AMordList(×" . $AMordList_count . "), "; |
|||
} |
|||
if ($BRordList_count > 0) { |
|||
$editsummary .= "BRordList(×" . $BRordList_count . "), "; |
|||
} |
|||
if ($ordOfList_count > 0) { |
|||
$editsummary .= "ordOfList(×" . $ordOfList_count . "), "; |
|||
} |
|||
if ($pipedList_count > 0) { |
|||
$editsummary .= "pipedList(×" . $pipedList_count . "), "; |
|||
} |
} |
||
// Finalize date list item format and remove any remaining marker strings ("§~§") |
// Finalize date list item format and remove any remaining marker strings ("§~§") |
||
$contents = preg_replace($regex_list_cleanup_nopipe, $replace_list_cleanup_nopipe, $contents); |
|||
$contents = preg_replace($regex_list_cleanup_pipetext, $replace_list_cleanup_pipetext, $contents); |
|||
$contents = preg_replace($regex_cleanup_final, $replace_cleanup_final, $contents); |
$contents = preg_replace($regex_cleanup_final, $replace_cleanup_final, $contents); |
||
Line 305: | Line 385: | ||
$editsummary = substr($editsummary, 0, -2); // to get rid of superfluous comma and space |
$editsummary = substr($editsummary, 0, -2); // to get rid of superfluous comma and space |
||
// In extreme cases where the edit summary is too long, trim the fat but keep the beef |
|||
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(×1\)/', '', $editsummary); // Remove "(×1)" |
|||
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\, /', ',', $editsummary); // Remove spaces after commas |
|||
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(×/', '(', $editsummary); // Remove "×" from all counts |
|||
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\..*:/', ':', $editsummary); // Remove most of lead-in |
|||
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(\d*\)/', ',', $editsummary); // Remove all remaining counts |
|||
overridecheck(); // checks if the manual override has been triggered |
overridecheck(); // checks if the manual override has been triggered |
Revision as of 21:24, 31 October 2009
<?php
/** fulldateunlinker.php -- Removes link tags from dates
* Release Candidate 2
*
* (c) 2009 James Hare (Harej) and others - http://en.wikipedia.org/wiki/User:Harej
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Developers (add your self here if you worked on the code):
* [[User:Harej]] - Initial code
* [[User:Chris G]] - MediaWiki API and database interfacing
* [[User:Tcncv]] - Date-parsing regular expressions, unlinker()
**/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
include("./public_html/botclasses.php"); // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("fdublogin.php");
// For the purposes of unambiguous documentation, the Month-Day-Year style of writing dates will be referred to as "American" and the Day-Month-Year style "British".
// I understand how not-right this is but I felt it was necessary to use two terms that could not be confused with each other.
// ("International" would be a good replacement for "British", but "i" could be confused for "1", plus "int" means "integer".)
// I'm sorry, Chris, but I had to ditch the toolserver DB interfacing in favor of API interfacing.
// The database parts were not working, and there was no way to tell how to fix it because no errors were being put out.
// So I did the easy thing and put the old method, which works, back in.
echo "Logging in...";
$objwiki = new wikipedia();
$objwiki->login($botuser, $botpass);
echo " done.\n";
$contents = "";
/* Connect to the database */
echo "Retrieving database login credentials...";
$toolserver_mycnf = parse_ini_file("/home/messedrocker/.my.cnf");
$toolserver_username = $toolserver_mycnf['user'];
$toolserver_password = $toolserver_mycnf['password'];
unset($toolserver_mycnf);
echo " done.\n";
echo "Logging into database...";
mysql_connect("sql",$toolserver_username,$toolserver_password);
@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());
echo " done.\n";
function query($query) {
// we need to use this function in case our MySQL connection times out
global $toolserver_username;
global $toolserver_password;
if (!mysql_ping()) {
mysql_connect("sql",$toolserver_username,$toolserver_password);
@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());
}
return mysql_query($query) or die(mysql_error());
}
function overridecheck() {
// This checks to see if [[User:Full-date unlinking bot/Manual override]] has been triggered by the placement of the string "Joe Biden" anywhere on the page.
// I chose the Vice President of the United States as the "safety word" because it can't be triggered accidentally. And because I'm nuts.
global $objwiki;
$overridepage = $objwiki->getpage("User:Full-date unlinking bot/manual override");
if (strpos($overridepage, "Joe Biden") !== false) {
die("Manual override has been triggered. Shutting down.");
}
}
function checktoprocess($page) {
// checktoprocess checks if $page should be processed.
// First, it checks if the page has already been processed based on a comment that is left by the bot after each page is processed.
// Checks are then performed based on the exclusion criteria on the bot's user page
// If any of these tests fail, "false" is returned; otherwise, "true" is returned.
$regex1 = "/^(January|February|March|April|May|June|July|August|September|October|November|December)(\s\d{1,2})?/"; // matches Month-Date
$regex2 = "/^\d{1,4}(st|rd|th|nd|s)?\s?(century|millennium)?( BC)?/i"; // matches year, century, and millennium articles, BC and AD
$regex3 = "/^List of \d{1,4}(st|rd|th|nd|s)?\s?(century|millennium)?( BC)?/i"; // List of (year or year range) Xs
$regex4 = "/^List of .* in (the )?\d{1,4}(st|rd|th|nd|s)?\s?(century|millennium)?( BC)?/i"; // List of Xs in the (year or year range)
if (preg_match($regex1, $page) || preg_match($regex2, $page) || preg_match($regex3, $page) || preg_match($regex4, $page)) {
return false;
}
global $objwiki;
global $contents;
do {
$contents = $objwiki->getpage($page,null,true);
} while ($contents == "");
global $botuser;
if (!$objwiki->nobots($page,$botuser,$contents)) {
return false;
}
$check = mysql_query("select * from `unlinked` where `name`=\"" . mysql_real_escape_string($page) . "\"");
$row = mysql_fetch_assoc($check);
if ($row['name'] == $page) {
return false;
}
else {
return true;
}
/* President Clinton called. He wants his era's shitty way of storing data back. */
}
function unlinker($link) {
global $objwiki;
global $contents;
$contents_archive = $contents; // this is to maintain an unchanged version for comparison purposes. if there is no change, the bot will not send the API request to edit the page
$editsummary = "Unlinking full-dates. [[User:Full-date unlinking bot|Details here]]. Codes: ";
//========== Define regular expression date building blocks
// Key: m=month, d=day, y=year, s = ordinal suffix, n = ordinal day (1st, 2nd, ...),
// x = day with optional ordinal suffix (1, 1st, 2, 2nd, etc.), o = "of"
//
// Root date components
$part_m = '(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|'
. 'July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)';
$part_d = '\d{1,2}';
$part_s = '(?:st|nd|rd|th)'; // Ordinal suffix
$part_n = $part_d . $part_s; // Day with ordinal suffix
$part_x = $part_d . $part_s . '?'; // Day with optional ordinal suffix
$part_y = '\d{1,4}(?:[ _]BC)?';
// Captured date components
$part_c_m = '(' . $part_m . ')';
$part_c_d = '(' . $part_d . ')';
$part_c_n = '(' . $part_n . ')';
$part_c_x = '(' . $part_x . ')';
$part_c_y = '(' . $part_y . ')';
// The following "Nth of Month" dates are not redirects to standard day articles, so are not to be unlinked
$excl_nom = '(?!1st[ _]of[ _]May|5th[ _]of[ _]May|4th[ _]of[ _]July|6th[ _]of[ _]October|8th[ _]of[ _]November)';
// Linked captured date components
$part_lc_md = '\[\[' . $part_c_m . '[ _]' . $part_c_d . '\]\]'; // [[month day]]
$part_lc_dm = '\[\[' . $part_c_d . '[ _]' . $part_c_m . '\]\]'; // [[day month]]
$part_lc_mn = '\[\[' . $part_c_m . '[ _]' . $part_c_n . '\]\]'; // [[month Nth]]
$part_lc_nm = '\[\[' . $part_c_n . '[ _]' . $part_c_m . '\]\]'; // [[Nth month]]
$part_lc_nom = '\[\[' . $excl_nom . $part_c_n . '[ _]of[ _]' . $part_c_m . '\]\]'; // [[Nth of month]], excluding select dates
$part_lc_y = '\[\[' . $part_c_y . '\]\]'; // [[year]]
// Generalized piped date components
$part_mx = $part_m . '[ _]' . $part_x; // Month day(th)
$part_xm = $part_x . '[ _]' . $part_m; // day(th) Month
$part_nom = $excl_nom . $part_n . '[ _]of[ _]' . $part_m; // Nth of Month, excluding select dates
$part_gen_link = '(?:' . $part_mx . '|' . $part_xm . '|' . $part_nom . ')'; // m-d(th), d(th)-m, or d(th)-of-m
$part_gen_pipe = '(?:' . $part_x . '|' . $part_mx . '|' . $part_xm . '|' . $part_nom . ')'; // d(th), m-d(th), d(th)-m, or d(th)-of-m
$part_lc_piped = '\[\[' . $part_gen_link . '\|' . '(' . $part_gen_pipe . ')' . '\]\]'; // Only the piped text is captured
// Punctuation
$part_AMreg_punct = ', ';
$part_BRreg_punct = ' ';
$part_AModd_punct = '(?!, \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding comma + single space
$part_BRodd_punct = '(?! \[)(?: *(?:, *)?)'; // spaces and optional comma, excluding single space
$part_c_gen_punct = '( *(?:, *)?)'; // spaces and optional comma (any form) - captured
$part_YMD_punct = ' *'; // Recognize only spaces (zero or more) between year and month-day
// Define words and punctuation that may appear between items of a date range or list.
// Optional comma + whitespace + separator punctuation or word + more whitespace
$part_list_commaopt = ',?';
$part_list_spacing = '(?: | |<br */?>)*'; // Zero or more: Space, symbolic nb-space, line break
$part_list_word =
'(?:-|–|—|−|~' // hyphen, en dash, em dash, minus, tilda
. '|/|&|[+]|×|x|,|;' // slash, ampersand, plus, times, letter x, comma, semicolon
. '|to|and|or|until|till|til|through|thru|into'
. '|–|—|\{\{ndash\}\}'
. ')';
$part_c_list_separator = '(' . $part_list_commaopt . $part_list_spacing . '(?:' . $part_list_word . $part_list_spacing . ')?)';
// Final search expressions
$regex_AMreg = '/' . $part_lc_md . $part_AMreg_punct . $part_lc_y . '/i';
$regex_BRreg = '/' . $part_lc_dm . $part_BRreg_punct . $part_lc_y . '/i';
$regex_AModd = '/' . $part_lc_md . $part_AModd_punct . $part_lc_y . '/i';
$regex_BRodd = '/' . $part_lc_dm . $part_BRodd_punct . $part_lc_y . '/i';
$regex_AMord = '/' . $part_lc_mn . $part_c_gen_punct . $part_lc_y . '/i';
$regex_BRord = '/' . $part_lc_nm . $part_c_gen_punct . $part_lc_y . '/i';
$regex_ordOf = '/' . $part_lc_nom . $part_c_gen_punct . $part_lc_y . '/i';
$regex_piped = '/' . $part_lc_piped . $part_c_gen_punct . $part_lc_y . '/i';
$regex_YMD = '/' . $part_lc_y . $part_YMD_punct . $part_lc_md . '/i';
$regex_ISO1 = '/' . '\[\[(-?\d{4}-\d{2}-\d{2})\]\]' . '/i'; // [[yyyy-mm-dd]] or [[-yyyy-mm-dd]]
$regex_ISO2 = '/' . '\[\[(-?\d{4})\]\]-\[\[(\d{2}-\d{2})\]\]' . '/i'; // [[yyyy]]-[[mm-dd]] or [[-yyyy]]-[[mm-dd]]
// Note: Negative year forms are also recognized in the above patterns for ISO-8601-like dates, but
// will likely never be encountered. Technically, ISO 8601 dates are only valid for years 1583 through
// 9999 of the Gregorian calendar, but we will not enforce those rules here.
// Final replace expressions ("§~§" is a marker, deleted later, that supports date list processing)
$replace_AM = '§~§$1 $2, $3';
$replace_BR = '§~§$1 $2 $3';
$replace_AMord = '§~§$1 $2$3$4'; // "(month) (dayth)(punct)(year)"
$replace_BRord = '§~§$1 $2$3$4'; // "(dayth) (month)(punct)(year)"
$replace_ordOf = '§~§$1 of $2$3$4'; // "(dayth) of (month)(punct)(year)"
$replace_piped = '§~§$1$2$3'; // (piped-text)(punc)(year)
$replace_YMD = '$1 $2 $3';
$replace_ISO1 = '$1';
$replace_ISO2 = '$1-$2';
// Date list search ("@" is used as a regex delimiter, since "/" is used in the eexpressions)
$regex_AMlist = '@' . $part_lc_md . $part_c_list_separator . '(?=§~§)' . '@i';
$regex_BRlist = '@' . $part_lc_dm . $part_c_list_separator . '(?=§~§)' . '@i';
$regex_AMordList = '@' . $part_lc_mn . $part_c_list_separator . '(?=§~§)' . '@i';
$regex_BRordList = '@' . $part_lc_nm . $part_c_list_separator . '(?=§~§)' . '@i';
$regex_ordOfList = '@' . $part_lc_nom . $part_c_list_separator . '(?=§~§)' . '@i';
$regex_pipedList = '@' . $part_lc_piped . $part_c_list_separator . '(?=§~§)' . '@i';
// Date list replace
$replace_AMlist = '§~§$1 $2$3'; // "(month) (day)(punct)..."
$replace_BRlist = '§~§$1 $2$3'; // "(day) (month)(punct)..."
$replace_AMordList = '§~§$1 $2$3'; // "(month) (dayth)(punct)..."
$replace_BRordList = '§~§$1 $2$3'; // "(dayth) (month)(punct)..."
$replace_ordOfList = '§~§$1 of $2$3'; // "(dayth) of (month)(punct)..."
$replace_pipedList = '§~§$1$2'; // "(pipetext)(punct)..."
// For information and review purposes, the above expressions are equivalent to:
// $regex_AMreg = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\], \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i'
// $regex_BRreg = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\] \[\[(\d{1,4}(?:[ _]BC)?)\]\]/i'
// $regex_AModd = '/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\](?!, \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i'
// $regex_BRodd = '/\[\[(\d{1,2})[ _](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\](?! \[)(?: *(?:, *)?)\[\[(\d{1,4}(?:[ _]BC)?)\]\]/i'
// $regex_YMD = '/\[\[(\d{1,4}(?:[ _]BC)?)\]\] *\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[ _](\d{1,2})\]\]/i'
// Remove any remaining placeholders.
$regex_cleanup_final = '/' . '§~§' . '/i';
$replace_cleanup_final = '';
//========== Begin search and replace ordinary dates
$match_count = 0;
$contents = preg_replace($regex_AMreg, $replace_AM, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "AMreg(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_BRreg, $replace_BR, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "BRreg(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_AModd, $replace_AM, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "AModd(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_BRodd, $replace_BR, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "BRodd(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_AMord, $replace_AMord, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "AMord(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_BRord, $replace_BRord, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "BRord(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_ordOf, $replace_ordOf, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "ordOf(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_piped, $replace_piped, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "piped(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_YMD, $replace_YMD, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "YMD(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_ISO1, $replace_ISO1, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "ISO1(×" . $match_count . "), ";
}
$match_count = 0;
$contents = preg_replace($regex_ISO2, $replace_ISO2, $contents, -1, &$match_count);
if ($match_count > 0) {
$editsummary .= "ISO2(×" . $match_count . "), ";
}
//========== Begin search and replace date lists
$AMlist_count = 0;
$BRlist_count = 0;
$AMordList_count = 0;
$BRordList_count = 0;
$ordOfList_count = 0;
$pipedList_count = 0;
// Process additional date parts to the left of a previously identified date or list.
// Loop for a maximum or 10 iterations or until no more matches are found
for ($i = 0; $i < 10; $i++) {
$current_iteration_match_count = 0;
$match_count = 0;
$contents = preg_replace($regex_AMlist, $replace_AMlist, $contents, -1, &$match_count);
$AMlist_count += $match_count;
$current_iteration_match_count += $match_count;
$match_count = 0;
$contents = preg_replace($regex_BRlist, $replace_BRlist, $contents, -1, &$match_count);
$BRlist_count += $match_count;
$current_iteration_match_count += $match_count;
$match_count = 0;
$contents = preg_replace($regex_AMordList, $replace_AMordList, $contents, -1, &$match_count);
$AMordList_count += $match_count;
$current_iteration_match_count += $match_count;
$match_count = 0;
$contents = preg_replace($regex_BRordList, $replace_BRordList, $contents, -1, &$match_count);
$BRordList_count += $match_count;
$current_iteration_match_count += $match_count;
$match_count = 0;
$contents = preg_replace($regex_ordOfList, $replace_ordOfList, $contents, -1, &$match_count);
$ordOfList_count += $match_count;
$current_iteration_match_count += $match_count;
$match_count = 0;
$contents = preg_replace($regex_pipedList, $replace_pipedList, $contents, -1, &$match_count);
$pipedList_count += $match_count;
$current_iteration_match_count += $match_count;
if ($current_iteration_match_count == 0) break; // No more list extensions found
}
// Add list extension counts to summary
if ($AMlist_count > 0) {
$editsummary .= "AMlist(×" . $AMlist_count . "), ";
}
if ($BRlist_count > 0) {
$editsummary .= "BRlist(×" . $BRlist_count . "), ";
}
if ($AMordList_count > 0) {
$editsummary .= "AMordList(×" . $AMordList_count . "), ";
}
if ($BRordList_count > 0) {
$editsummary .= "BRordList(×" . $BRordList_count . "), ";
}
if ($ordOfList_count > 0) {
$editsummary .= "ordOfList(×" . $ordOfList_count . "), ";
}
if ($pipedList_count > 0) {
$editsummary .= "pipedList(×" . $pipedList_count . "), ";
}
// Finalize date list item format and remove any remaining marker strings ("§~§")
$contents = preg_replace($regex_cleanup_final, $replace_cleanup_final, $contents);
//========== Postprocessing
$editsummary = substr($editsummary, 0, -2); // to get rid of superfluous comma and space
// In extreme cases where the edit summary is too long, trim the fat but keep the beef
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(×1\)/', '', $editsummary); // Remove "(×1)"
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\, /', ',', $editsummary); // Remove spaces after commas
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(×/', '(', $editsummary); // Remove "×" from all counts
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\..*:/', ':', $editsummary); // Remove most of lead-in
if (strlen($editsummary) > 200) $editsummary = preg_replace('/\(\d*\)/', ',', $editsummary); // Remove all remaining counts
overridecheck(); // checks if the manual override has been triggered
$sqlquery = query("INSERT INTO `unlinked` (`name`) VALUES (\"".mysql_real_escape_string($link)."\")");
if (strlen($contents) == 0) {
echo "Contents blanked during processing of article \"$link\". Skipping save step.";
}
else if ($contents != $contents_archive) {
$return_code = $objwiki->edit($link,$contents,$editsummary,true,true,null,true); // posts the change. The two "true" parameters indicate that this is a bot edit and it is a minor edit
if ($return_code['error']['code']=='editconflict') {
echo 'Edit conflict detected....';
}
sleep(10);
}
}
$months = array("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December");
for ($i = 0; $i < count($months); $i++) { // for each month
$links = $objwiki->whatlinkshere($months[$i], "&blnamespace=0");
for ($j = 0; $j < count($links); $j++) {
echo "Checking " . $links[$j] . "\n";
if (checktoprocess($links[$j])) { // if the checktoprocess function returns true
echo $links[$j] . " shall be processed.\n";
unlinker($links[$j]);
}
else {
echo $links[$j] . " shall NOT be processed.\n";
}
}
for ($d = 1; $d < 32; $d++) { // This is like the above, except with different date combinations
echo "Checking backlinks to " . $month . " " . $d . "\n";
$links = $objwiki->whatlinkshere($months[$i] . " " . $d, "&blnamespace=0");
for ($j = 0; $j < count($links); $j++) {
echo "Checking " . $links[$j] . "\n";
if (checktoprocess($links[$j])) {
echo $links[$j] . " shall be processed.\n";
unlinker($links[$j]);
}
else {
echo $links[$j] . " shall NOT be processed.\n";
}
}
}
}
?>