User:GeneralNotability/edit-filter-hit-analyzer.js

Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump.
This code will be executed when previewing this page.

Documentation for this user script can be added at User:GeneralNotability/edit-filter-hit-analyzer. This user script seems to have an accompanying .css page at User:GeneralNotability/edit-filter-hit-analyzer.css.

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.

// <nowiki>
// @ts-check
// More information on how an edit filter was tripped

importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' );
/**
 * @typedef EditFilterLine
 * @type {Object}
 * @property {string} text Text of the line
 * @property {string} normedText Text with some modifications applied for parsing
 * @property {string[]} variables Variables found in the line
 * @property {number} indentation how far to indent the line
 */

const efa_knownVars = {};

const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/;
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/;
// Vars in this list shouldn't have their full content displayed because they're usually really big
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ];

// Parser regexes
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/;
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/;

// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json

async function efa_main() {
	// populate knownVars with built-in values
	Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => {
		efa_knownVars[key] = value;
	});

	const $actionParams = $('h3:contains("Action parameters")', document);
	$('<h3>').text('Filter rule analysis').insertBefore($actionParams);
	const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams);
	// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it
	const filterId = $('a', document).filter(function () {
		return efa_FILTER_PAGE_RE.test(this.getAttribute('href'));
	}).attr('href').match(efa_FILTER_PAGE_RE)[2];
	const filterPattern = await efa_getFilter(filterId);
	if (!filterPattern) {
		// Something went wrong (or we can't access the filter),
		// bail out
		return;
	}
	const filterRules = efa_parseRules(filterPattern);
	filterRules.forEach((rule) => {
		const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;');
		$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet);
		rule.variables.forEach((variable) => {
			const $efaData = $('<span>').addClass('efa-data');
			if (efa_HIDDEN_VARS.includes(variable)) {
				$efaData.append(variable + ': (not shown)');
			} else {
				$efaData.append(variable + ': ' + efa_knownVars[variable]);
			}
			$efaData.appendTo($bullet);
		});

		const rlikeMatch = rule.normedText.match(efa_RLIKE_RE);
		if (rlikeMatch) {
			// If this is a regex, try to expand it and generate a link
			let reText = rlikeMatch[1];
			const matchType = rlikeMatch[2];
			let re = rlikeMatch[3];
			// Whether to apply substitution on the regex side (don't if )
			let subRe = true;
			const reQuoteSearch = re.match(/.*?"(.*)"/);
			if (reQuoteSearch) {
				// Remove the quotes around a literal regex
				re = reQuoteSearch[1];
				// Don't attempt substitution since this is a literal
				subRe = false;
			}
			// Expand variables (or possibly function calls on a variable)
			// TODO: this is really simplistic (obviously) - strip function calls and get
			// an exact match
			for (const entry of Object.entries(efa_knownVars)) {
				if (reText.includes(entry[0])) {
					reText = entry[1].toString();
				}
				if (re.includes(entry[0]) && subRe) {
					re = entry[1].toString();
				}
			}
			// abusefilter entries are PCRE and by default use the 'u' flag.
			// if irlike is being used, add the i flag as well.
			let flags = 'u';
			if (matchType === 'irlike') {
				flags += 'i';
			}
			const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`;
			$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)'));
		}
		$bullet.appendTo($ruleAnchor);
	});
}

/**
 * Turn a filter's pattern into a list of rules
 *
 * @param {string} pattern Original text pattern
 *
 * @return {EditFilterLine[]} List of rules
 */
function efa_parseRules(pattern) {
	// Strip all newline characters and split by statement
	// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters
	// It matches all split characters (&, ;, &) as long as they are _not_ between quotes
	const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g);
	/** @type {EditFilterLine[]} */
	const annotatedFilterLines = [];
	filterLines.forEach((line) => {
		// Trim, then replace long whitespaces with a single space
		const cleanedUpLine = line.trim().replace(/\s+/, ' ');
		const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine,
			variables: [], indentation: 0 };
		annotatedFilterLines.push(annotatedLine);
	});

	// Indentation pass: figure out how deep each statement is nested in parens,
	// then create a "normed" version which strips the extra paren(s)
	// While we're in there, save variable assignments
	let indent = 0;
	annotatedFilterLines.forEach((line) => {
		const openParens = line.text.split(/\(/).length;
		const closeParens = line.text.split(/\)/).length;
		// Because of how we split the strings, a block of indented text will
		// always start with an extra open paren on the starting rule, and close
		// with an extra one on the ending rule (but we want both of those lines)
		// indented
		const deltaParens = openParens - closeParens;
		if (deltaParens > 0) {
			indent += deltaParens;
			line.indentation = indent;
			// Remove the extra paren from the normed text
			line.normedText = line.text.replace('(', '');
		} else if (deltaParens < 0) {
			line.indentation = indent;
			indent += deltaParens; // Remember, deltaparens is negative here, so add it
			// Remove the extra paren from the normed text
			line.normedText = line.text.replace(/\)(?=[^)]*$)/, '');
		} else {
			line.indentation = indent;
		}
		const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE);
		if (varAssignment) {
			efa_knownVars[varAssignment[1]] = varAssignment[2];
		}
	});

	// Annotate by going through and identifying variables used in the lines
	Object.keys(efa_knownVars).forEach((varName) => {
		const varRe = new RegExp('\\b' + varName + '\\b');
		annotatedFilterLines.forEach((line) => {
			if (line.text.match(varRe)) {
				const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE);
				if (assignmentMatch && assignmentMatch[1] === varName) {
					// Don't list the variable on the line that assigns
					return;
				}
				line.variables.push(varName);
			}
		});
	});
	return annotatedFilterLines;
}

async function efa_getFilter(filterId) {
	try {
		const api = new mw.Api();
		const response = await api.get({
			action: 'query',
			list: 'abusefilters',
			abfstartid: filterId,
			abfendid: filterId,
			abfprop: 'pattern'
		});
		if (response.query.abusefilters.length < 1) {
			// No match?
			return '';
		}
		return response.query.abusefilters[0].pattern;

	} catch (error) {
		console.log(error);
		return '';
	}
}

// On document load, check if this page is a edit filter hit - if so,
// load the EF stuff
$(function () {
	if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) {
		efa_main();
	}
});
// </nowiki>