Jump to content

User:Andrybak/Scripts/Archiver.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*
 * <nowiki>
 * This script is a fork of https://en.wikipedia.org/w/index.php?title=User:Enterprisey/archiver.js&oldid=1113588553
 * which was forked from https://en.wikipedia.org/w/index.php?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411
 */

/*
 * Documentation of CSS classes.
 *
 * .arky-span is the main custom class of the script.
 * Inside a .arky-span is an archive link, which triggers selection.
 * .arky-span tags also store data (not visible in the UI) associated with
 * corresponding sections: the index of the section and heading level
 * (i.e. ==2==, ===3===, etc)
 * Tags with .arky-span class are also called "archive spans".
 *
 * .arky-selected-section is put onto the whole semantic heading
 * of a section, selected by user for archiving.
 * During selection the class is used for styling (a light-blue background).
 * After clicking "archive ... selected threads" the class is used to
 * find all the archive spans, which live inside the semantic heading.
 */

const USERSCRIPT_NAME = "Archiver";

function notifyUser(messageText, important) {
	let message = messageText;
	if (important) {
		const img = document.createElement('img');
		img.width = 20;
		img.height = 20;
		// [[File:Ambox important.svg]]
		img.src = 'https://upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png';
		const span = document.createElement('span');
		span.append(img, ' ', messageText);
		message = span;
	}
	mw.notify(message, {
		title: USERSCRIPT_NAME,
		autoHide: false
	});
}

const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;

function error(...toLog) {
	console.error(LOG_PREFIX, ...toLog);
}

function warn(...toLog) {
	console.warn(LOG_PREFIX, ...toLog);
}

function info(...toLog) {
	console.info(LOG_PREFIX, ...toLog);
}

function debug(...toLog) {
	console.debug(LOG_PREFIX, ...toLog);
}

function constructAd() {
	// TODO maybe also introduce versions + include version in the ad?
	return `using [[w:en:User:Andrybak/Scripts/Archiver|${USERSCRIPT_NAME}]]`;
}

function constructEditSummary(mainEditSummary) {
	return `${mainEditSummary} (${constructAd()})`;
}

$.when( mw.loader.using(['mediawiki.util','mediawiki.api']), $.ready).done( function () {
	/*
	 * Reference documentation about keys and values in mw.config:
	 * https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config
	 */
	if (!mw.config.get('wgIsArticle')) { // This variable is badly named -- it is not related to a page being a main namespace "article".
		info('Not a wiki page. Aborting.');
		return;
	}
	if (mw.config.get('wgArticleId') === 0 || mw.config.get('wgRevisionId') === 0) {
		info('Either the page does not exist yet or it is a diffonly=yes view. Aborting.');
		return;
	}
	if (mw.config.get("wgNamespaceNumber") % 2 == 0 && mw.config.get("wgNamespaceNumber") != 4) {
		// not a talk page and not project namespace
		info('Not a discussion namespace. Aborting.');
		return;
	}
	if (mw.config.get("wgNamespaceNumber") == -1) {
		// is a special page
		info('This is a "Special:" page. Aborting.');
		return;
	}
	const parserOutput = document.querySelector('#mw-content-text .mw-parser-output');
	if (!parserOutput || $(parserOutput).find(':header').length === 0) {
		info('Nothing to archive here. Aborting.');
		return;
	}
	if (mw.config.get('wgDiffNewId') != null || mw.config.get('wgDiffOldId') != null) {
		info('Detected diff view. Aborting.');
		return;
	}

	mw.util.addCSS(".arky-selected-section { background-color: color-mix(in srgb, var(--background-color-progressive-subtle, #D9E9FF) 90%, var(--background-color-progressive--hover, #D9E9FF)); }" +
		".arky-selected-section .arky-span a { font-weight:bold }");

	var sectionCodepointOffsets = new Object();
	var wikiText = "";
	var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection

	var portletLink = mw.util.addPortletLink("p-cactions", "#", "ⵙCA", "ca-oecaAndrybak", "Enter/exit the archival process", null, null);
	var archiveButton = $(document.createElement("button"));
	let highestArchiveSubpagePromise = null;
	$(portletLink).click(function(e) {
		$(".arky-selected-section").removeClass('.arky-selected-section');
		$(".arky-span").toggle();
		archiveButton.toggle();
		if (highestArchiveSubpagePromise == null) {
			/*
			 * Start searching for the archive subpage with highest number immediately.
			 * Then the click listener on `archiveButton` will wait for this `Promise`.
			 */
			highestArchiveSubpagePromise = findHighestArchiveSubpage();
		} else {
			// TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.
		}
	});

	archiveButton.html("archive all the selected threads")
		.attr("id", 'arky-archive-button')
		.css("position", 'sticky')
		.css("bottom", 0)
		.css("width", '100%')
		.css("font-size", '200%');
	$(document.body).append(archiveButton);
	archiveButton.toggle();
	archiveButton.click(function(e) {
		var selectedSections = $(".arky-selected-section .arky-span").map(function() {
			return $(this).data("section");
		}).toArray();
		if (selectedSections.length === 0) {
			return alert("No threads selected, aborting");
		}
		const timeoutId = setTimeout(() => {
			/*
			 * In case highestArchiveSubpagePromise is taking a long time,
			 * show to the user that stuff is happening.
			 */
			archiveButton.text("Loading...");
		}, 1000);
		highestArchiveSubpagePromise.then(result => {
			clearTimeout(timeoutId);
			info("Successful highestArchiveSubpagePromise:", result);
			doArchive(selectedSections, result);
		}, rejection => {
			info("Failed highestArchiveSubpagePromise:", rejection);
			const currentPageName = mw.config.get("wgPageName");
			doArchive(selectedSections, archiveSpacedSubpageName(currentPageName, "???"));
		});
	}); // end of archiveButton click handler

	addArchiveLinks();

	function midPoint(lower, upper) {
		return Math.floor(lower + (upper - lower) / 2);
	}

	/*
	 * Based on https://en.wikipedia.org/wiki/Module:Exponential_search
	 */
	async function exponentialSearch(testFunc, i, lower, upper) {
		if (await testFunc(i)) {
			if (i + 1 == upper) {
				return i;
			}
			lower = i;
			if (upper) {
				i = midPoint(lower, upper);
			} else {
				i = i * 2;
			}
			return exponentialSearch(testFunc, i, lower, upper);
		} else {
			upper = i;
			i = midPoint(lower, upper);
			return exponentialSearch(testFunc, i, lower, upper);
		}
	}

	function archiveSpacedSubpageName(pageName, archiveNumber) {
		return pageName + "/Archive " + archiveNumber;
	}

	function archiveSlashedSubpageName(pageName, archiveNumber) {
		return pageName + "/Archive/" + archiveNumber;
	}

	/*
	 * Based on https://en.wikipedia.org/wiki/Wikipedia_talk:User_scripts/Archive_7#nocreate-missing
	 */
	async function pageExists(title) {
		const api = new mw.Api();
		const response = await api.get({
			"action": "query",
			"format": "json",
			"titles": title
		});
		const missing = "missing" in Object.values(response.query.pages)[0];
		return !missing;
	}

	/*
	 * Find the subpage of this page, which will be used as destination/target of archiving.
	 * It is just "Archive 1" by default, but can be increased by exponentialSearch.
	 */
	function findHighestArchiveSubpage() {
		info("findHighestArchiveSubpage: start");
		// mw.config.get("wgPageName")
		return new Promise(async (resolve, reject) => {
			try {
				const currentPageName = mw.config.get("wgPageName");
				const currentYear = new Date().getUTCFullYear();
				let subpageFunc;
				/*
				 * Check if "current year" subpage is a good candidate for
				 * pages with https://en.wikipedia.org/wiki/Template:Archived_annually
				 * TODO: maybe implement checking if {{Archived annually}} is transcluded.
				 */
				if (await pageExists(archiveSpacedSubpageName(currentPageName, currentYear - 1)) && !await pageExists(archiveSpacedSubpageName(currentPageName, currentYear + 1))) {
					resolve(archiveSpacedSubpageName(currentPageName, currentYear));
					return;
				} else if (await pageExists(archiveSpacedSubpageName(currentPageName, 1))) {
					subpageFunc = archiveSpacedSubpageName;
				} else if (await pageExists(archiveSlashedSubpageName(currentPageName, 1))) {
					subpageFunc = archiveSlashedSubpageName;
				} else {
					notifyUser("Cannot find the first archive subpage", true);
					info('Assuming zero archive subpages.');
					resolve(archiveSpacedSubpageName(currentPageName, 1));
					return;
				}

				async function checkArchiveSubpageExists(archiveNumber) {
					const archiveSubpageTitle = subpageFunc(currentPageName, archiveNumber);
					return pageExists(archiveSubpageTitle);
				}
				// see also https://en.wikipedia.org/wiki/Module:Highest_archive_number
				const highestNumber = await exponentialSearch(checkArchiveSubpageExists, 10, 1, null);
				const highestArchiveSubpage = subpageFunc(currentPageName, highestNumber);
				resolve(highestArchiveSubpage);
			} catch (e) {
				const msg = "Cannot find archive subpage with the highest number";
				error(msg, e);
				notifyUser(msg, true);
				reject(e);
			}
		});
	}

	function doArchive(selectedSections, highestArchiveSubpage) {
		// returns `s` without the substring starting at `start` and ending at `end`
		function cut(s, start, end) {
			return s.substr(0, start) + s.substring(end);
		}

		const archivePageName = prompt("Archiving " + selectedSections.length + " threads: where should we move them to? The latest archive number seems to be:", highestArchiveSubpage);
		if (!archivePageName || archivePageName == mw.config.get("wgPageName")) {
			return alert("No archive target selected, aborting");
		}

		// codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText)
		var codepointToUtf16Idx = {};

		// Initialize "important" (= either a section start or end) values to 0
		selectedSections.forEach(function(n) {
			codepointToUtf16Idx[sectionCodepointOffsets[n].start] = 0;
			codepointToUtf16Idx[sectionCodepointOffsets[n].end] = 0;
		});
		codepointToUtf16Idx[Infinity] = Infinity; // Because sometimes we'll have Infinity as an "end" value

		// fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript).
		// yes, this loops through every character in the wikitext. very unfortunate.
		var codepointPos = 0;
		for (var utf16Pos = 0; utf16Pos < wikiText.length; utf16Pos++, codepointPos++) {
			if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) {
				codepointToUtf16Idx[codepointPos] = utf16Pos;
			}

			if ((0xD800 <= wikiText.charCodeAt(utf16Pos)) && (wikiText.charCodeAt(utf16Pos) <= 0xDBFF)) {
				// high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1.
				utf16Pos++; // skip the low surrogate
			}
		}

		var newTextForArchivePage = selectedSections.map(function(n) {
			return wikiText.substring(
				codepointToUtf16Idx[sectionCodepointOffsets[n].start],
				codepointToUtf16Idx[sectionCodepointOffsets[n].end]
			);
		}).join("");

		selectedSections.reverse(); // go in reverse order so that we don't invalidate the offsets of earlier sections
		var newWikiText = wikiText;
		selectedSections.forEach(function(n) {
			newWikiText = cut(
				newWikiText,
				codepointToUtf16Idx[sectionCodepointOffsets[n].start],
				codepointToUtf16Idx[sectionCodepointOffsets[n].end]
			);
		});

		info("archive this:" + newTextForArchivePage);
		info("revised page:" + newWikiText);
		var pluralizedThreads = selectedSections.length + ' thread' + ((selectedSections.length === 1) ? '' : 's');
		new mw.Api().postWithToken("csrf", {
			action: 'edit',
			title: mw.config.get("wgPageName"),
			text: newWikiText,
			summary: constructEditSummary(`Removing ${pluralizedThreads}, will be on [[${archivePageName}]]`),
			basetimestamp: revStamp,
			starttimestamp: revStamp
		})
		.done(function(res1) {
			alert("Successfully removed threads from talk page");
			info(res1);
			new mw.Api().postWithToken("csrf", {
				action: 'edit',
				title: archivePageName,
				appendtext: "\n" + newTextForArchivePage,
				summary: constructEditSummary(`Adding ${pluralizedThreads} from [[${mw.config.get("wgPageName")}]]`)
			})
				.done(() => alert("Successfully added threads to archive page"))
				.fail(() => alert("Failed to add threads to archive page. Manual inspection needed."))
				.always(function(res2) {
					info(res2);
					window.location.reload();
				});
		})
		.fail(function(res1) {
			alert("Failed to remove threads from talk page. Aborting archive process.");
			error(res1);
			window.location.reload();
		});
	} // end of doArchive()

	/*
	 * Filters the result of the API query.
	 * Plus, importantly, populates the global variable `sectionCodepointOffsets`.
	 */
	function extractValidSections(apiResultSections) {
		const validSections = {};

		// generate the list/array
		$(apiResultSections)
		// For sections transcluded from other pages, s.index will look
		// like T-1 instead of just 1. Remove those.
			.filter((i, s) => { return s.index == parseInt(s.index) })
			.each((i, s) => { validSections[s.index] = s });

		// record the offsets in the global variable
		for (var i in validSections) {
			i = parseInt(i);
			// What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!!
			sectionCodepointOffsets[i] = {
				start: validSections[i].byteoffset,
				end: validSections.hasOwnProperty(i+1)?validSections[i+1].byteoffset:Infinity
			};
		}

		return validSections;
	}

	/*
	 * The convoluted way of "header" vs "headerContainer" is needed, because
	 * there are different HTML layouts for "headings" in different skins.
	 * In Vector 2022, layout of ==Second level== versus ===Third level===
	 * headings is different even for a _single_ skin.
	 *
	 * The HTML layout is either
	 *    <div><h*></h*><edit section link /></div>
	 * or
	 *    <h*><edit section link /></h*>
	 *
	 * "headerContainer" is always the outer of the tags, it always contains the <edit section link /> tags.
	 * "header" is always one of the <h*> tags.
	 * Meaning that in some cases "header" and "headContainer" is the same HTML element.
	 *
	 * arky-span, aka archiveSpans are put inside the "<edit section link />".
	 *
	 * For details, see:
	 *   - https://www.mediawiki.org/w/index.php?title=Heading_HTML_changes&oldid=6538029
	 *   - https://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)/Archive_213#Tech_News_%E2%80%93_User%3AEnterprisey%2Farchiver.js
	 */

	// Returns a plain HTMLElement
	function findEditSectionForHeader(header) {
		// in Vector, the bracketed [edit] section link is a direct child element/node
		const maybeVectorEditSection = header.querySelector('.mw-editsection');
		if (maybeVectorEditSection) {
			return maybeVectorEditSection;
		}
		// in other skins, the bracketed [edit] section link is a sibling of <h2> <h3> etc
		if (header.parentElement.classList.contains('mw-heading')) {
			const maybeEditSection = header.parentElement.querySelector('.mw-editsection');
			if (maybeEditSection) {
				return maybeEditSection;
			}
		}
		return null;
	}

	// Returns a jQuery object
	function findHeaderContainerForArchiveSpan(archiveSpan) {
		const jQueryArchiveSpan = $(archiveSpan);
		const maybeDivMwHeading = jQueryArchiveSpan.parents('.mw-heading');
		if (maybeDivMwHeading.length > 0) {
			return maybeDivMwHeading;
		}
		const maybeHeaderParent = jQueryArchiveSpan.parents(':header');
		if (maybeHeaderParent.length > 0) {
			return maybeHeaderParent;
		}
		notifyUser("findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.", true);
		error("findHeaderContainerForArchiveSpan: Tags for bug report:", archiveSpan, archiveSpan.parentElement);
		return null;
	}

	/*
	 * We need to get the top-level element of the whole header.
	 * In some cases it's a <div class="mw-heading ...">
	 * In other cases it's just a <h2>, <h3>, etc tag.
	 *
	 * Returns a plain HTML element.
	 */
	function getHeaderContainer(header) {
		if (header.parentElement.classList.contains('mw-heading')) {
			return header.parentElement;
		}
		return header;
	}

	/*
	 * Create the bracketed [archive] links next to the [edit] section links.
	 * These [archive] links are used by a user to select sections for archival.
	 */
	function addArchiveLinks() {
		// grab page sections and wikitext so we can add the "archive" links to appropriate sections
		new mw.Api().get({action: 'parse', page: mw.config.get("wgPageName")}).done(function(parseApiResult) {
			new mw.Api().get({action: 'query', pageids: mw.config.get("wgArticleId"), prop: ['revisions'], rvprop: ['content', 'timestamp']}).done(function(revisionsApiResult) {
				var rv;
				rv = revisionsApiResult.query.pages[mw.config.get("wgArticleId")].revisions[0];
				wikiText = rv["*"];
				revStamp = rv['timestamp'];
			});

			const validSections = extractValidSections(parseApiResult.parse.sections);

			/*
			 * The search for all section headings starts with
			 * finding all <h*> tags, which aren't for the table of contents.
			 * From the <h*> tags, we find the "[edit] section links" and
			 * "header containers" (see big comment above).
			 */
			const allHeaders = $("#mw-content-text .mw-parser-output").find(":header").filter(':not(#mw-toc-heading)');
			if (allHeaders.length == 0) {
				warn('Nothing to archive here. The script should have aborted earlier. Aborting.');
				return;
			}
			allHeaders.each(function(i, header) {
				var sectionNumber = undefined;
				const headerLevel = header.tagName.slice(1) * 1; // wtf javascript
				const editSection = findEditSectionForHeader(header);
				if (!editSection) {
					// we're either in an archived page ([edit] links are hidden with magic word __NOEDITSECTION__)
					return;
				}
				{
					const editSectionLink = editSection.querySelector('a');
					if (editSectionLink) {
						// Note: href may not be set.
						const sectionNumberMatch = editSectionLink.href && editSectionLink.href.match(/&section=(\d+)/);
						if (sectionNumberMatch) {
							sectionNumber = sectionNumberMatch[1];
						}
					}
				}
				// if the if statement fails, it might be something like <h2>not a real section</h2>
				if (validSections.hasOwnProperty(sectionNumber)) {
					const archiveLink = $('<a>')
					.text('archive')
					.click(function() {
						const correspondingHeaderContainer = $(getHeaderContainer(header));
						correspondingHeaderContainer.toggleClass('arky-selected-section');

						// now, click all sub-sections of this section
						// i.e. mark all needed header containers with our CSS class .arky-selected-section
						const isThisSectionSelected = correspondingHeaderContainer.hasClass('arky-selected-section');
						const thisHeaderLevel = archiveLink.parents('.arky-span').data('header-level');

						// starting from the current section, loop through each section
						const allArchiveSpans = $('.arky-span');
						const currSectionIdx = allArchiveSpans.index(archiveLink.parents('.arky-span'));
						for (var i = currSectionIdx + 1; i < allArchiveSpans.length; i++) {
							if ($(allArchiveSpans[i]).data('header-level') <= thisHeaderLevel) {
								// if this isn't a subsection, quit
								break;
							}
							const closestHeaderContainer = findHeaderContainerForArchiveSpan(allArchiveSpans[i]);
							if (closestHeaderContainer.hasClass('arky-selected-section') != isThisSectionSelected) {
								// if this section needs toggling, toggle it
								closestHeaderContainer.toggleClass('arky-selected-section');
							}
						}

						// finally, update button
						const selectedSectionCount = $('.arky-selected-section').length;
						archiveButton
							.prop('disabled', selectedSectionCount === 0)
							.text('archive ' + selectedSectionCount + ' selected thread' +
								  ((selectedSectionCount === 1) ? '' : 's'));
					});

					const arkySpan = $("<span>", { "class": "arky-span" })
					.css({'display':'none'})
					.data({'header-level': headerLevel, 'section': sectionNumber})
					.append(
						$('<span>', { 'class': 'mw-editsection-bracket' }).text('['),
						archiveLink,
						$('<span>', { 'class': 'mw-editsection-bracket' }).text(']')
					);

					$(editSection).append("&nbsp;", arkySpan);
				}
			});
		})
		.fail(() => warn('addArchiveLinks: Cannot download current page. Aborting.'));
	}
}); // </nowiki>