User:Andrybak/Scripts/Archiver.js
Appearance
< User:Andrybak | Scripts
(Redirected from User:Andrybak/Archiver.js)Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Andrybak/Scripts/Archiver. |
/*
* <nowiki>
* This script is a fork of https://en.wikipedia.org/w/index.php?title=User:Enterprisey/archiver.js&oldid=1113588553
* which was forked from https://en.wikipedia.org/w/index.php?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411
*/
/*
* Documentation of CSS classes.
*
* .arky-span is the main custom class of the script.
* Inside a .arky-span is an archive link, which triggers selection.
* .arky-span tags also store data (not visible in the UI) associated with
* corresponding sections: the index of the section and heading level
* (i.e. ==2==, ===3===, etc)
* Tags with .arky-span class are also called "archive spans".
*
* .arky-selected-section is put onto the whole semantic heading
* of a section, selected by user for archiving.
* During selection the class is used for styling (a light-blue background).
* After clicking "archive ... selected threads" the class is used to
* find all the archive spans, which live inside the semantic heading.
*/
const USERSCRIPT_NAME = "Archiver";
function notifyUser(messageText, important) {
let message = messageText;
if (important) {
const img = document.createElement('img');
img.width = 20;
img.height = 20;
// [[File:Ambox important.svg]]
img.src = 'https://upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png';
const span = document.createElement('span');
span.append(img, ' ', messageText);
message = span;
}
mw.notify(message, {
title: USERSCRIPT_NAME,
autoHide: false
});
}
const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;
function error(...toLog) {
console.error(LOG_PREFIX, ...toLog);
}
function warn(...toLog) {
console.warn(LOG_PREFIX, ...toLog);
}
function info(...toLog) {
console.info(LOG_PREFIX, ...toLog);
}
function debug(...toLog) {
console.debug(LOG_PREFIX, ...toLog);
}
function constructAd() {
// TODO maybe also introduce versions + include version in the ad?
return `using [[w:en:User:Andrybak/Scripts/Archiver|${USERSCRIPT_NAME}]]`;
}
function constructEditSummary(mainEditSummary) {
return `${mainEditSummary} (${constructAd()})`;
}
$.when( mw.loader.using(['mediawiki.util','mediawiki.api']), $.ready).done( function () {
/*
* Reference documentation about keys and values in mw.config:
* https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config
*/
if (!mw.config.get('wgIsArticle')) { // This variable is badly named -- it is not related to a page being a main namespace "article".
info('Not a wiki page. Aborting.');
return;
}
if (mw.config.get('wgArticleId') === 0 || mw.config.get('wgRevisionId') === 0) {
info('Either the page does not exist yet or it is a diffonly=yes view. Aborting.');
return;
}
if (mw.config.get("wgNamespaceNumber") % 2 == 0 && mw.config.get("wgNamespaceNumber") != 4) {
// not a talk page and not project namespace
info('Not a discussion namespace. Aborting.');
return;
}
if (mw.config.get("wgNamespaceNumber") == -1) {
// is a special page
info('This is a "Special:" page. Aborting.');
return;
}
const parserOutput = document.querySelector('#mw-content-text .mw-parser-output');
if (!parserOutput || $(parserOutput).find(':header').length === 0) {
info('Nothing to archive here. Aborting.');
return;
}
if (mw.config.get('wgDiffNewId') != null || mw.config.get('wgDiffOldId') != null) {
info('Detected diff view. Aborting.');
return;
}
mw.util.addCSS(".arky-selected-section { background-color: color-mix(in srgb, var(--background-color-progressive-subtle, #D9E9FF) 90%, var(--background-color-progressive--hover, #D9E9FF)); }" +
".arky-selected-section .arky-span a { font-weight:bold }");
var sectionCodepointOffsets = new Object();
var wikiText = "";
var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection
var portletLink = mw.util.addPortletLink("p-cactions", "#", "ⵙCA", "ca-oecaAndrybak", "Enter/exit the archival process", null, null);
var archiveButton = $(document.createElement("button"));
let highestArchiveSubpagePromise = null;
$(portletLink).click(function(e) {
$(".arky-selected-section").removeClass('.arky-selected-section');
$(".arky-span").toggle();
archiveButton.toggle();
if (highestArchiveSubpagePromise == null) {
/*
* Start searching for the archive subpage with highest number immediately.
* Then the click listener on `archiveButton` will wait for this `Promise`.
*/
highestArchiveSubpagePromise = findHighestArchiveSubpage();
} else {
// TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.
}
});
archiveButton.html("archive all the selected threads")
.attr("id", 'arky-archive-button')
.css("position", 'sticky')
.css("bottom", 0)
.css("width", '100%')
.css("font-size", '200%');
$(document.body).append(archiveButton);
archiveButton.toggle();
archiveButton.click(function(e) {
var selectedSections = $(".arky-selected-section .arky-span").map(function() {
return $(this).data("section");
}).toArray();
if (selectedSections.length === 0) {
return alert("No threads selected, aborting");
}
const timeoutId = setTimeout(() => {
/*
* In case highestArchiveSubpagePromise is taking a long time,
* show to the user that stuff is happening.
*/
archiveButton.text("Loading...");
}, 1000);
highestArchiveSubpagePromise.then(result => {
clearTimeout(timeoutId);
info("Successful highestArchiveSubpagePromise:", result);
doArchive(selectedSections, result);
}, rejection => {
info("Failed highestArchiveSubpagePromise:", rejection);
const currentPageName = mw.config.get("wgPageName");
doArchive(selectedSections, archiveSpacedSubpageName(currentPageName, "???"));
});
}); // end of archiveButton click handler
addArchiveLinks();
function midPoint(lower, upper) {
return Math.floor(lower + (upper - lower) / 2);
}
/*
* Based on https://en.wikipedia.org/wiki/Module:Exponential_search
*/
async function exponentialSearch(testFunc, i, lower, upper) {
if (await testFunc(i)) {
if (i + 1 == upper) {
return i;
}
lower = i;
if (upper) {
i = midPoint(lower, upper);
} else {
i = i * 2;
}
return exponentialSearch(testFunc, i, lower, upper);
} else {
upper = i;
i = midPoint(lower, upper);
return exponentialSearch(testFunc, i, lower, upper);
}
}
function archiveSpacedSubpageName(pageName, archiveNumber) {
return pageName + "/Archive " + archiveNumber;
}
function archiveSlashedSubpageName(pageName, archiveNumber) {
return pageName + "/Archive/" + archiveNumber;
}
/*
* Based on https://en.wikipedia.org/wiki/Wikipedia_talk:User_scripts/Archive_7#nocreate-missing
*/
async function pageExists(title) {
const api = new mw.Api();
const response = await api.get({
"action": "query",
"format": "json",
"titles": title
});
const missing = "missing" in Object.values(response.query.pages)[0];
return !missing;
}
/*
* Find the subpage of this page, which will be used as destination/target of archiving.
* It is just "Archive 1" by default, but can be increased by exponentialSearch.
*/
function findHighestArchiveSubpage() {
info("findHighestArchiveSubpage: start");
// mw.config.get("wgPageName")
return new Promise(async (resolve, reject) => {
try {
const currentPageName = mw.config.get("wgPageName");
const currentYear = new Date().getUTCFullYear();
let subpageFunc;
/*
* Check if "current year" subpage is a good candidate for
* pages with https://en.wikipedia.org/wiki/Template:Archived_annually
* TODO: maybe implement checking if {{Archived annually}} is transcluded.
*/
if (await pageExists(archiveSpacedSubpageName(currentPageName, currentYear - 1)) && !await pageExists(archiveSpacedSubpageName(currentPageName, currentYear + 1))) {
resolve(archiveSpacedSubpageName(currentPageName, currentYear));
return;
} else if (await pageExists(archiveSpacedSubpageName(currentPageName, 1))) {
subpageFunc = archiveSpacedSubpageName;
} else if (await pageExists(archiveSlashedSubpageName(currentPageName, 1))) {
subpageFunc = archiveSlashedSubpageName;
} else {
notifyUser("Cannot find the first archive subpage", true);
info('Assuming zero archive subpages.');
resolve(archiveSpacedSubpageName(currentPageName, 1));
return;
}
async function checkArchiveSubpageExists(archiveNumber) {
const archiveSubpageTitle = subpageFunc(currentPageName, archiveNumber);
return pageExists(archiveSubpageTitle);
}
// see also https://en.wikipedia.org/wiki/Module:Highest_archive_number
const highestNumber = await exponentialSearch(checkArchiveSubpageExists, 10, 1, null);
const highestArchiveSubpage = subpageFunc(currentPageName, highestNumber);
resolve(highestArchiveSubpage);
} catch (e) {
const msg = "Cannot find archive subpage with the highest number";
error(msg, e);
notifyUser(msg, true);
reject(e);
}
});
}
function doArchive(selectedSections, highestArchiveSubpage) {
// returns `s` without the substring starting at `start` and ending at `end`
function cut(s, start, end) {
return s.substr(0, start) + s.substring(end);
}
const archivePageName = prompt("Archiving " + selectedSections.length + " threads: where should we move them to? The latest archive number seems to be:", highestArchiveSubpage);
if (!archivePageName || archivePageName == mw.config.get("wgPageName")) {
return alert("No archive target selected, aborting");
}
// codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText)
var codepointToUtf16Idx = {};
// Initialize "important" (= either a section start or end) values to 0
selectedSections.forEach(function(n) {
codepointToUtf16Idx[sectionCodepointOffsets[n].start] = 0;
codepointToUtf16Idx[sectionCodepointOffsets[n].end] = 0;
});
codepointToUtf16Idx[Infinity] = Infinity; // Because sometimes we'll have Infinity as an "end" value
// fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript).
// yes, this loops through every character in the wikitext. very unfortunate.
var codepointPos = 0;
for (var utf16Pos = 0; utf16Pos < wikiText.length; utf16Pos++, codepointPos++) {
if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) {
codepointToUtf16Idx[codepointPos] = utf16Pos;
}
if ((0xD800 <= wikiText.charCodeAt(utf16Pos)) && (wikiText.charCodeAt(utf16Pos) <= 0xDBFF)) {
// high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1.
utf16Pos++; // skip the low surrogate
}
}
var newTextForArchivePage = selectedSections.map(function(n) {
return wikiText.substring(
codepointToUtf16Idx[sectionCodepointOffsets[n].start],
codepointToUtf16Idx[sectionCodepointOffsets[n].end]
);
}).join("");
selectedSections.reverse(); // go in reverse order so that we don't invalidate the offsets of earlier sections
var newWikiText = wikiText;
selectedSections.forEach(function(n) {
newWikiText = cut(
newWikiText,
codepointToUtf16Idx[sectionCodepointOffsets[n].start],
codepointToUtf16Idx[sectionCodepointOffsets[n].end]
);
});
info("archive this:" + newTextForArchivePage);
info("revised page:" + newWikiText);
var pluralizedThreads = selectedSections.length + ' thread' + ((selectedSections.length === 1) ? '' : 's');
new mw.Api().postWithToken("csrf", {
action: 'edit',
title: mw.config.get("wgPageName"),
text: newWikiText,
summary: constructEditSummary(`Removing ${pluralizedThreads}, will be on [[${archivePageName}]]`),
basetimestamp: revStamp,
starttimestamp: revStamp
})
.done(function(res1) {
alert("Successfully removed threads from talk page");
info(res1);
new mw.Api().postWithToken("csrf", {
action: 'edit',
title: archivePageName,
appendtext: "\n" + newTextForArchivePage,
summary: constructEditSummary(`Adding ${pluralizedThreads} from [[${mw.config.get("wgPageName")}]]`)
})
.done(() => alert("Successfully added threads to archive page"))
.fail(() => alert("Failed to add threads to archive page. Manual inspection needed."))
.always(function(res2) {
info(res2);
window.location.reload();
});
})
.fail(function(res1) {
alert("Failed to remove threads from talk page. Aborting archive process.");
error(res1);
window.location.reload();
});
} // end of doArchive()
/*
* Filters the result of the API query.
* Plus, importantly, populates the global variable `sectionCodepointOffsets`.
*/
function extractValidSections(apiResultSections) {
const validSections = {};
// generate the list/array
$(apiResultSections)
// For sections transcluded from other pages, s.index will look
// like T-1 instead of just 1. Remove those.
.filter((i, s) => { return s.index == parseInt(s.index) })
.each((i, s) => { validSections[s.index] = s });
// record the offsets in the global variable
for (var i in validSections) {
i = parseInt(i);
// What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!!
sectionCodepointOffsets[i] = {
start: validSections[i].byteoffset,
end: validSections.hasOwnProperty(i+1)?validSections[i+1].byteoffset:Infinity
};
}
return validSections;
}
/*
* The convoluted way of "header" vs "headerContainer" is needed, because
* there are different HTML layouts for "headings" in different skins.
* In Vector 2022, layout of ==Second level== versus ===Third level===
* headings is different even for a _single_ skin.
*
* The HTML layout is either
* <div><h*></h*><edit section link /></div>
* or
* <h*><edit section link /></h*>
*
* "headerContainer" is always the outer of the tags, it always contains the <edit section link /> tags.
* "header" is always one of the <h*> tags.
* Meaning that in some cases "header" and "headContainer" is the same HTML element.
*
* arky-span, aka archiveSpans are put inside the "<edit section link />".
*
* For details, see:
* - https://www.mediawiki.org/w/index.php?title=Heading_HTML_changes&oldid=6538029
* - https://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)/Archive_213#Tech_News_%E2%80%93_User%3AEnterprisey%2Farchiver.js
*/
// Returns a plain HTMLElement
function findEditSectionForHeader(header) {
// in Vector, the bracketed [edit] section link is a direct child element/node
const maybeVectorEditSection = header.querySelector('.mw-editsection');
if (maybeVectorEditSection) {
return maybeVectorEditSection;
}
// in other skins, the bracketed [edit] section link is a sibling of <h2> <h3> etc
if (header.parentElement.classList.contains('mw-heading')) {
const maybeEditSection = header.parentElement.querySelector('.mw-editsection');
if (maybeEditSection) {
return maybeEditSection;
}
}
return null;
}
// Returns a jQuery object
function findHeaderContainerForArchiveSpan(archiveSpan) {
const jQueryArchiveSpan = $(archiveSpan);
const maybeDivMwHeading = jQueryArchiveSpan.parents('.mw-heading');
if (maybeDivMwHeading.length > 0) {
return maybeDivMwHeading;
}
const maybeHeaderParent = jQueryArchiveSpan.parents(':header');
if (maybeHeaderParent.length > 0) {
return maybeHeaderParent;
}
notifyUser("findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.", true);
error("findHeaderContainerForArchiveSpan: Tags for bug report:", archiveSpan, archiveSpan.parentElement);
return null;
}
/*
* We need to get the top-level element of the whole header.
* In some cases it's a <div class="mw-heading ...">
* In other cases it's just a <h2>, <h3>, etc tag.
*
* Returns a plain HTML element.
*/
function getHeaderContainer(header) {
if (header.parentElement.classList.contains('mw-heading')) {
return header.parentElement;
}
return header;
}
/*
* Create the bracketed [archive] links next to the [edit] section links.
* These [archive] links are used by a user to select sections for archival.
*/
function addArchiveLinks() {
// grab page sections and wikitext so we can add the "archive" links to appropriate sections
new mw.Api().get({action: 'parse', page: mw.config.get("wgPageName")}).done(function(parseApiResult) {
new mw.Api().get({action: 'query', pageids: mw.config.get("wgArticleId"), prop: ['revisions'], rvprop: ['content', 'timestamp']}).done(function(revisionsApiResult) {
var rv;
rv = revisionsApiResult.query.pages[mw.config.get("wgArticleId")].revisions[0];
wikiText = rv["*"];
revStamp = rv['timestamp'];
});
const validSections = extractValidSections(parseApiResult.parse.sections);
/*
* The search for all section headings starts with
* finding all <h*> tags, which aren't for the table of contents.
* From the <h*> tags, we find the "[edit] section links" and
* "header containers" (see big comment above).
*/
const allHeaders = $("#mw-content-text .mw-parser-output").find(":header").filter(':not(#mw-toc-heading)');
if (allHeaders.length == 0) {
warn('Nothing to archive here. The script should have aborted earlier. Aborting.');
return;
}
allHeaders.each(function(i, header) {
var sectionNumber = undefined;
const headerLevel = header.tagName.slice(1) * 1; // wtf javascript
const editSection = findEditSectionForHeader(header);
if (!editSection) {
// we're either in an archived page ([edit] links are hidden with magic word __NOEDITSECTION__)
return;
}
{
const editSectionLink = editSection.querySelector('a');
if (editSectionLink) {
// Note: href may not be set.
const sectionNumberMatch = editSectionLink.href && editSectionLink.href.match(/§ion=(\d+)/);
if (sectionNumberMatch) {
sectionNumber = sectionNumberMatch[1];
}
}
}
// if the if statement fails, it might be something like <h2>not a real section</h2>
if (validSections.hasOwnProperty(sectionNumber)) {
const archiveLink = $('<a>')
.text('archive')
.click(function() {
const correspondingHeaderContainer = $(getHeaderContainer(header));
correspondingHeaderContainer.toggleClass('arky-selected-section');
// now, click all sub-sections of this section
// i.e. mark all needed header containers with our CSS class .arky-selected-section
const isThisSectionSelected = correspondingHeaderContainer.hasClass('arky-selected-section');
const thisHeaderLevel = archiveLink.parents('.arky-span').data('header-level');
// starting from the current section, loop through each section
const allArchiveSpans = $('.arky-span');
const currSectionIdx = allArchiveSpans.index(archiveLink.parents('.arky-span'));
for (var i = currSectionIdx + 1; i < allArchiveSpans.length; i++) {
if ($(allArchiveSpans[i]).data('header-level') <= thisHeaderLevel) {
// if this isn't a subsection, quit
break;
}
const closestHeaderContainer = findHeaderContainerForArchiveSpan(allArchiveSpans[i]);
if (closestHeaderContainer.hasClass('arky-selected-section') != isThisSectionSelected) {
// if this section needs toggling, toggle it
closestHeaderContainer.toggleClass('arky-selected-section');
}
}
// finally, update button
const selectedSectionCount = $('.arky-selected-section').length;
archiveButton
.prop('disabled', selectedSectionCount === 0)
.text('archive ' + selectedSectionCount + ' selected thread' +
((selectedSectionCount === 1) ? '' : 's'));
});
const arkySpan = $("<span>", { "class": "arky-span" })
.css({'display':'none'})
.data({'header-level': headerLevel, 'section': sectionNumber})
.append(
$('<span>', { 'class': 'mw-editsection-bracket' }).text('['),
archiveLink,
$('<span>', { 'class': 'mw-editsection-bracket' }).text(']')
);
$(editSection).append(" ", arkySpan);
}
});
})
.fail(() => warn('addArchiveLinks: Cannot download current page. Aborting.'));
}
}); // </nowiki>