User:Novem Linguae/Scripts/SpeciesHelper.js
Appearance
(Redirected from User:Novem Linguae/Scripts/AddTaxobox.js)
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Novem Linguae/Scripts/SpeciesHelper. |
// <nowiki>
// === Compiled with Novem Linguae's publish.php script ======================
$(async function() {
// === main.js ======================================================
/*
Got a bug report or feature request? Please let me know on my talk page.
- Adds a "Run SpeciesHelper" option to the left menu.
- Features
- When clicked, will try to add a speciesbox, category, taxonbar, and stub template to the article.
- Then will send you to a diff screen where you can adjust, if needed.
- If the article uses taxobox or automatic taxobox, will convert it to a speciesbox
- Will fill in the {{Taxonbar}}'s wikidata ID
- Will get the bottom 15 taxa in the tree of life, and will try each of those to figure out the category and stub type, until one is found that exists.
- ONLY WORKS ON SPECIES ARTICLES, not higher taxa
- script deterimes genus+species from title or from {{Speciesbox}}
- Hotkey support: alt+A to run the script, alt+shift+S to save the page
This page was assembled from 5 files using my publish.php script. I also have an offline test suite with around 175 unit tests.
*/
/* eslint-disable no-undef, no-alert */
// Wrapping this in a function so my linter doesn't throw a syntax error for "return not inside a function". Can remove this wrapper if I find a better way to do it. Or better yet, convert to a class.
$( async () => {
const title = mw.config.get( 'wgPageName' ); // includes namespace, underscores instead of spaces
if ( !shouldRunOnThisPage( title ) ) {
return;
}
mw.util.addPortletLink(
'p-navigation',
'#',
'Run SpeciesHelper',
'SpeciesHelper',
// can't put comma here, silent error
'[Alt+A]'
);
$( '#SpeciesHelper' ).on( 'click', async () => await speciesHelper( title ) );
document.addEventListener( 'keydown', async ( event ) => {
if ( event.altKey /* && event.shiftKey */ && event.key === 'a' ) {
return await speciesHelper( title );
}
} );
async function speciesHelper( title ) {
const diffID = mw.config.get( 'wgRevisionId' );
const wikicode = await getWikicodeOfDiff( diffID );
let wikicode2 = wikicode;
draftCategoryColon = '';
const isDraft = mw.config.get( 'wgNamespaceNumber' ) === 118;
const hasDraftCategoriesTemplate = wikicode2.match( /\{\{Draft[ _]categories/gi );
if ( isDraft && !hasDraftCategoriesTemplate ) {
draftCategoryColon = ':';
}
// add a line break to the end. makes certain regex's simpler. trim it later.
wikicode2 += '\n';
// SKIP DISAMBIGUATION PAGES ===============================================
if ( isDisambiguationPage( wikicode2 ) ) {
alert( 'No changes needed. (Disambiguation pages skipped)' );
return;
}
// SKIP REDIRECTS FOR NOW, MAY ADD LATER ===================================
// TODO: support redirects. if it's good to add {{Taxonbar}} or {{Italic title}} to them, could do that. Could also check the target and see if it's a genus, and add {{R from species to genus}}. There's also a series of {{R from alternate scientific name}} templates (e.g. {{R from alternate scientific name|algae}}), could ask if that's what we want added, then add the correct one.
if ( isRedirectPage( wikicode2 ) ) {
alert( 'No changes needed. (Redirect pages currently skipped)' );
return;
}
// SKIP SUBSCPECIES FOR NOW ================================================
if ( isSubSpecies( title, wikicode2 ) ) {
alert( 'No changes needed. (Subspecies currently skipped)' );
return;
}
// SKIP VIRUSES FOR NOW, THEIR SPECIES NAMES ARE LONG ======================
const hasVirusBox = wikicode2.match( /\{\{(?:Virusbox)/i );
if ( hasVirusBox ) {
alert( 'No changes needed. (Viruses currently skipped)' );
return;
}
const editSummaryItems = [];
// CONVERT TAXOBOX TO SPECIESBOX ==========================================
const wrongBox = wikicode2.match( /\{\{(?:Automatic[ _]taxobox|Taxobox)/i );
if ( wrongBox ) {
/*
let pageCreationDate = await getPageCreationDate(title);
let oneYearAgo = getDateOneYearAgo(new Date());
let isOldArticle = pageCreationDate < oneYearAgo;
if ( ! isOldArticle ) { // don't convert taxoboxes for old articles. too controversial.
*/
// Skipping gastropod (snail and slug) conversion for now. The latest I heard about this was at https://en.wikipedia.org/wiki/Wikipedia_talk:Automated_taxobox_system/Archive_5#Converting_taxoboxes_to_speciesboxes, where JoJan requested that I convert gastropod articles only if their Genus article was using {{Automatic taxobox}}, which was an indicator that he had checked it thoroughly. Sounds like it's OK to mass convert for all other projects, per Plantdrew's analysis in the above thread there are no longer any serious objectors.
const isGastropod = wikicode2.match( /Gastropoda/i );
if ( !isGastropod ) {
const ctsb = new ConvertToSpeciesBox();
wikicode2 = ctsb.convert( wikicode2 );
editSummaryItems.push( 'convert to {{Speciesbox}}' );
}
}
// DraftCleaner: remove {{Draft}} tag if not in draftspace ================
let wikicodeBefore = wikicode2;
wikicode2 = removeDraftTagIfNotDraftspace( wikicode2, isDraft );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( '-{{Draft}}' );
}
// REMOVE ITALICTITLE IF SPECIESBOX PRESENT ===============================
wikicodeBefore = wikicode2;
wikicode2 = removeItalicTitleIfSpeciesBoxPresent( wikicode2 );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( '-{{Italic title}}' );
}
// FIX {{Speciesbox |genus=A |species=A B}} ===============================
wikicodeBefore = wikicode2;
wikicode2 = fixSpeciesParameterThatContainsGenus( wikicode2 );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( 'fix species parameter' );
}
// REMOVE WHITESPACE IN CATEGORIES =========================================
wikicodeBefore = wikicode2;
wikicode2 = fixWhitespaceInCategories( wikicode2 );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( 'fix whitespace in categories' );
}
// TURN ON CATEGORIES IF NOT DRAFTSPACE ====================================
wikicodeBefore = wikicode2;
wikicode2 = enableCategories( wikicode2, isDraft );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( 'enable categories' );
}
// TURN OFF CATEGORIES IF NOT DRAFTSPACE ====================================
wikicodeBefore = wikicode2;
wikicode2 = disableCategories( wikicode2, isDraft );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( 'disable categories' );
}
// GET TAXA ================================================================
// taxa array is ascending. genus->subfamily->family->order->etc.
/** Example: Mansonia (plant), rather than Mansonia */
let taxonomyTemplateGenus = '';
let species = '';
let taxa = '';
let genusForAlert = '';
// try to get genus+species from {{Speciesbox |parent= |taxon= }}
// Rare edge case used sometimes when disambiguating a genus
// Example: {{Speciesbox | parent = Pilophorus (fungus) | taxon = Pilophorus acicularis}}
const hasSpeciesboxTaxonAndParentParameters = getSpeciesboxTaxonAndParentParameters( wikicode2 );
if ( !taxa && hasSpeciesboxTaxonAndParentParameters ) {
taxonomyTemplateGenus = hasSpeciesboxTaxonAndParentParameters.taxonomyTemplateGenus;
genusForAlert = hasSpeciesboxTaxonAndParentParameters.genusForAlert;
species = hasSpeciesboxTaxonAndParentParameters.species;
taxa = await getTaxa( taxonomyTemplateGenus );
}
// try to get genus+species from {{Speciesbox |genus= |species= }}
if ( !taxa ) {
taxonomyTemplateGenus = wikicode2.match( /\|\s*genus\s*=\s*([A-Za-z ()]+?)\s*[<\n|}]/ );
species = wikicode2.match( /\|\s*species\s*=\s*([a-z]+)\s*[<\n|}]/ );
if ( taxonomyTemplateGenus && species ) {
taxonomyTemplateGenus = taxonomyTemplateGenus[ 1 ];
genusForAlert = taxonomyTemplateGenus;
species = species[ 1 ];
taxa = await getTaxa( taxonomyTemplateGenus );
}
}
let titleNoNamespace = getTitleNoNamespace( title );
// try to get genus+species from the article title
if ( !taxa ) {
if ( isSandbox( title ) ) {
// get bolded title
const match = wikicode.match( /'{3,}(.*?)'{3,}/ )[ 1 ];
titleNoNamespace = match.replace( ' ', '_' );
}
const matches = titleNoNamespace.match( /^([^_]+)_([^_]+)/ );
if ( matches ) {
taxonomyTemplateGenus = matches[ 1 ];
genusForAlert = genusForAlert || taxonomyTemplateGenus;
species = matches[ 2 ];
taxa = await getTaxa( taxonomyTemplateGenus );
}
}
// try to get genus+species from {{Speciesbox |taxon= }}
const hasTaxonParameter = wikicode2.match( /\|\s*taxon\s*=\s*([A-Z][a-z]+) ([a-z]+)\s*[<\n|}]/ );
if ( !taxa && hasTaxonParameter ) {
taxonomyTemplateGenus = hasTaxonParameter[ 1 ];
genusForAlert = taxonomyTemplateGenus;
species = hasTaxonParameter[ 2 ];
taxa = await getTaxa( taxonomyTemplateGenus );
}
if ( !genusForAlert ) {
alert( 'Unable to isolate genus and species. Is this actually a species?' );
return;
}
if ( !taxa ) {
const userWantsToCreateTaxonomy = confirm( `Template:Taxonomy/${ genusForAlert } does not exist. Is this actually a species? If so, need to create Template:Taxonomy/${ genusForAlert }. Press OK to create the template, or Cancel to go back to the article.` );
if ( userWantsToCreateTaxonomy ) {
window.location.href = `https://en.wikipedia.org/w/index.php?title=Template:Taxonomy/${ genusForAlert }&action=edit&preload=Template:Taxonomy/preload`;
}
return;
}
/** Example: Mansonia, rather than Mansonia (plant) */
const displayGenus = taxonomyTemplateGenus.replace( / \([^)]+\)/, '' );
taxa = taxaStringToArray( taxa );
// Unusual edge case where the category and stub template exist, but their pages aren't for a taxa, they're for something not related to biology at all. The fix: just delete them from the taxa array.
const taxaToSkip = [ 'Appalachia' ];
for ( taxon of taxaToSkip ) {
taxa = taxa.filter( ( item ) => item !== taxon );
}
const mopf = new MOSOrderPositionFinder( wikicode );
// SPECIESBOX ================================================================
const hasSpeciesBoxOrTaxoBox = wikicode2.match( /\{\{(?:Speciesbox|Species[ _]box|Automatic[ _]taxobox|Taxobox|Subspeciesbox|Infraspeciesbox|Hybridbox|Virusbox)/i );
if ( !hasSpeciesBoxOrTaxoBox ) {
const toAdd =
`{{Speciesbox
| genus = ${ taxonomyTemplateGenus }
| species = ${ species }
}}`;
wikicode2 = mopf.insertAtSection( wikicode2, toAdd, 'infoboxes' );
editSummaryItems.push( '+{{Speciesbox}}' );
}
// TAXONBAR ================================================================
// valid taxonbar templates: 'Taxonbar', 'Taxon-bar', 'Taxobar', 'TaxonIDs', 'Taxon identifiers', 'Taxon bar',
const hasTaxonBar = wikicode2.match( /\{\{(?:Taxonbar|Taxon-bar|Taxobar|TaxonIDs|Taxon[ _]identifiers|Taxon[ _]bar)/i );
if ( !hasTaxonBar ) {
const wikidataID = await getWikidataID( `${ displayGenus } ${ species }` );
if ( wikidataID ) {
const toAdd = `{{Taxonbar|from=${ wikidataID }}}`;
wikicode2 = mopf.insertAtSection( wikicode2, toAdd, 'taxonBar' );
editSummaryItems.push( '+{{Taxonbar}}' );
}
}
// CHECK IF A BUNCH OF STUBS AND CATEGORIES EXIST ==================
const listOfNonLatinSpeciesCategories = {
// SENTENCE CASE AND SPACES PLEASE, usually plural
// latin (deprecated) -> non-latin (preferred)
Acoelomorpha: 'Acoelomorphs',
Amazona: 'Amazon parrots',
Aleyrodidae: 'Whiteflies',
Anguidae: 'Anguids',
Anisoptera: 'Dragonflies',
Annelida: 'Annelids',
Anura: 'Frogs',
Araneae: 'Spiders',
Bombus: 'Bumblebees',
Brachyura: 'Crabs',
Bryozoa: 'Bryozoans',
Caprini: 'Caprids',
Cebinae: 'Capuchin monkeys',
Cephalopoda: 'Cephalopods',
Cervidae: 'Deer',
Chilopoda: 'Centipedes',
Cirripedia: 'Barnacles',
Cnidaria: 'Cnidarians',
Coleoptera: 'Beetles',
Colubridae: 'Colubrids',
Ctenophora: 'Ctenophores',
Curculionoidea: 'Weevils',
Dactyloidae: 'Anoles',
Decapodiformes: 'Squid',
Delphinidae: 'Oceanic dolphins',
Dendrobatidae: 'Poison dart frogs',
Dicruridae: 'Drongos',
Didelphimorphia: 'Opossums',
Ephemeroptera: 'Mayflies',
Flavobacteriia: 'Flavobacteria',
Formicidae: 'Ants',
Gastropoda: 'Gastropods',
Heterokonta: 'Heterokonts',
Insecta: 'Insects',
Isoptera: 'Termites',
Licmetis: 'Corellas',
Lithodidae: 'King crabs',
Lorisoidea: 'Lorises and galagos',
Macropodidae: 'Macropods',
Macronaria: 'Macronarians',
Mammalia: 'Mammals',
Mammuthus: 'Mammoths',
Marmota: 'Marmots',
Mycobacterium: 'Mycobacteria',
Myotis: 'Mouse-eared bats',
Mysticeti: 'Baleen whale',
Nematoda: 'Nematodes',
Octopoda: 'Octopuses',
Onychophora: 'Onychophorans',
Paeonia: 'Peonies',
Pitheciinae: 'Sakis and uakaris',
Pseudacris: 'Chorus frogs',
Rangifer: 'Reindeer',
Rhinocerotidae: 'Rhinoceroses',
Rosa: 'Roses',
Sigmodon: 'Cotton rats',
Sitta: 'Nuthatches',
Syrphidae: 'Hoverflies',
Thysanoptera: 'Thrips',
Toxodontidae: 'Toxodonts',
Toxotes: 'Archerfish'
};
const pagesToCheck = getPagesToCheck( taxa, listOfNonLatinSpeciesCategories );
let listOfPages = await doPagesExist( pagesToCheck );
listOfPages = fixArrayOrder( pagesToCheck, listOfPages );
// DELETE [[Category:Genus| ]] =============================================
// so we can regenerate it correctly
const genusCategoryToCheck = listOfNonLatinSpeciesCategories[ taxonomyTemplateGenus ] ? listOfNonLatinSpeciesCategories[ taxonomyTemplateGenus ] : taxonomyTemplateGenus;
wikicode2 = deleteGenusCategoryWithSpaceDisambiguator( wikicode2, genusCategoryToCheck, draftCategoryColon );
// CATEGORY ================================================================
const suggestedCategory = parseListOfPages( listOfPages, 'category' );
const categoryGenusRegEx = new RegExp( `\\[\\[${ draftCategoryColon }Category:` + regExEscape( genusCategoryToCheck ), 'i' );
const hasGenusParameterCategory = wikicode2.match( categoryGenusRegEx ); // so basically, don't run if genus category is already present. importantly, includes genuses with parentheses, e.g. [[Category:Saara (lizard)]]
if ( suggestedCategory && !hasGenusParameterCategory ) {
const wikicodeBeforeCategoryChanges = wikicode2;
// build list of categories currently in the article
const categoriesInArticle = getListOfCategoriesFromWikitext( wikicode2 );
// check categories in the article as a batch, see if their taxonomy templates exist
let categoriesWithTaxonomy = categoriesInArticle ? await doPagesExist( categoriesInArticle.map( ( v ) => {
v = v.replace( 'Category:', '' );
return 'Template:Taxonomy/' + v;
} ) ) : [];
categoriesWithTaxonomy = categoriesWithTaxonomy.map( ( v ) => {
v = v.replace( 'Template:Taxonomy/', '' );
return 'Category:' + v;
} );
const categoriesToDelete = getAllTaxaCategories( listOfPages );
// if existing categories have taxonomy templates, add them to the list of categories to delete to avoid [[WP:OVERCAT]]
for ( const cat of categoriesWithTaxonomy ) {
categoriesToDelete.push( cat );
}
// delete any category names in our dictionary (non-latin category names)
for ( const key in listOfNonLatinSpeciesCategories ) {
const value = listOfNonLatinSpeciesCategories[ key ];
categoriesToDelete.push( 'Category:' + value );
}
// delete all taxonomy related categories, to avoid [[WP:OVERCAT]]
for ( const cat of categoriesToDelete ) {
const regEx = new RegExp( '\\[\\[:?' + regExEscape( cat ) + '(?:\\|[^\\]]+)?\\]\\] {0,}\\n', 'gi' );
wikicode2 = wikicode2.replace( regEx, '' );
}
let toAdd = '';
const suggestedCategoryNoParentheses = suggestedCategory.replace( / \([A-Za-z]+\)/, '' );
if ( suggestedCategoryNoParentheses === taxonomyTemplateGenus ) {
toAdd = `[[${ draftCategoryColon }Category:${ suggestedCategory }|${ species }]]`;
} else {
toAdd = `[[${ draftCategoryColon }Category:${ suggestedCategory }]]`;
}
wikicode2 = mopf.insertAtSection( wikicode2, toAdd, 'categories' );
const categoriesInArticle2 = getListOfCategoriesFromWikitext( wikicode2 );
const categoryListsAreIdentical = arraysHaveSameValuesCaseInsensitive( categoriesInArticle, categoriesInArticle2 );
if ( wikicodeBeforeCategoryChanges !== wikicode2 && !categoryListsAreIdentical ) {
editSummaryItems.push( 'category' );
}
if ( categoryListsAreIdentical ) {
// undo our changes, since they are just minor changes involving whitespace or order of categories
wikicode2 = wikicodeBeforeCategoryChanges;
}
}
// IMPROVE CATEGORIES ======================================================
const hasCategories = wikicode2.match( /\[\[:?Category:/gi );
let categoryCount = hasCategories !== null ? hasCategories.length : 0;
const hasImproveCategories = wikicode2.match( /\{\{Improve[ _]categories/i );
if ( categoryCount < 2 && !hasImproveCategories && !isDraft ) {
// Insertion point: very bottom. confirmed via twinkle test
let date = new Date();
const month = date.toLocaleString( 'default', { month: 'long' } );
const year = date.getFullYear();
date = month + ' ' + year;
wikicode2 = mopf.insertAtSection( wikicode2, `{{Improve categories|date=${ date }}}`, 'improveCategories' );
editSummaryItems.push( '+{{Improve categories}}' );
}
// STUB ==================================================================
const suggestedStubName = parseListOfPages( listOfPages, 'stub' );
const shouldBeStub = countWords( wikicode2 ) < 150; // I've been reverted for stub tagging an article with a word count of 175 before. so setting this kind of low.
const hasStubTags = wikicode2.match( /\{\{.+-stub\}\}\n/gi );
// these stubs are kind of fuzzy for various reasons. not worth messing with them
const stubsThatTriggerSkip = [
'Iguanidae',
'Lizard',
'Potato',
// [[User talk:Novem Linguae/Scripts/SpeciesHelper#Dual Stub Removal]]
'Plant-disease',
// [[User talk:Novem Linguae/Scripts/SpeciesHelper#Dual Stub Removal]]
'Paleo-gastropod'
];
let skip = false;
for ( const stub of stubsThatTriggerSkip ) {
const regex = new RegExp( '\\{\\{' + stub + '-stub\\}\\}', 'i' );
const hasStub = wikicode2.match( regex );
if ( hasStub ) {
skip = true;
break;
}
}
if (
!isDraft &&
( shouldBeStub || hasStubTags ) &&
suggestedStubName &&
!skip
) {
let newStubs = [ '{{' + suggestedStubName + '}}' ];
newStubs = addSafelistedStubs( newStubs, wikicode2 );
// build oldStubs array
const oldStubs = [];
const matches = wikicode2.match( /\{\{[^}]+-stub\}\}/gi );
for ( key in matches ) {
oldStubs.push( matches[ key ] );
}
if ( !arraysHaveSameValuesCaseInsensitive( oldStubs, newStubs ) ) {
// put proposed stub changes into "buffer" variable. if we decide to commit, commit it later
let buffer = wikicode2;
// delete all stubs, in preparation for writing ours
// handle this edge case: {{-stub}}
buffer = deleteAllStubs( buffer );
// convert newStubs to toAdd string
let toAdd = '';
for ( const stub of newStubs ) {
toAdd += '\n' + stub;
}
toAdd = toAdd.slice( 1 ); // chop off \n at beginning
buffer = mopf.insertAtSection( buffer, toAdd, 'stubTemplates' );
if ( !isMinorChange( wikicode2, buffer ) ) {
editSummaryItems.push( 'stub' );
// commit buffer
wikicode2 = buffer;
}
}
}
// DELETE {{Stub}} IF ANY OTHER STUBS PRESENT
wikicodeBefore = wikicode2;
wikicode2 = deleteStubTemplateIfAnyOtherStubsPresent( wikicode2 );
if ( wikicode2 !== wikicodeBefore && !editSummaryItems.includes( 'stub' ) ) {
editSummaryItems.push( 'stub' );
}
// REPLACE <references /> WITH {{Reflist}} ==========================
wikicodeBefore = wikicode2;
wikicode2 = replaceReferencesWithReflist( wikicode2 );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( '<references /> to {{Reflist}}' );
}
// fix too many {{Uncategorized}} or {{Improve categories}} tags
const allCategoriesRegEx = new RegExp( `(?<=\\[\\[${ draftCategoryColon })Category:.+(?=\\]\\])`, 'gi' );
categoryCount = wikicode2.match( allCategoriesRegEx );
if ( categoryCount && categoryCount.length > 0 ) {
// delete {{Uncategorized}}
const buffer = wikicode2;
wikicode2 = wikicode2.replace( /\{\{Uncategorized[^}]*\}\}\n{0,2}/gi, '' );
if ( buffer !== wikicode2 ) {
editSummaryItems.push( '-{{Uncategorized}}' );
}
}
if ( categoryCount && categoryCount.length > 2 ) {
// delete improve categories
const buffer = wikicode2;
wikicode2 = wikicode2.replace( /\{\{Improve categories[^}]*\}\}\n{0,2}/gi, '' );
if ( buffer !== wikicode2 ) {
editSummaryItems.push( '-{{Improve categories}}' );
}
}
// add {{Short description}} if missing and the script has a good guess
const hasShortDescription = wikicode2.match( /\{\{(?:Short[ _]description|Shortdesc|Shortdescription|Short desc)/i );
if ( !hasShortDescription ) {
const suggestedShortDescription = suggestShortDescriptionFromWikicode( wikicode2, taxa );
if ( suggestedShortDescription ) {
wikicode2 = mopf.insertAtSection( wikicode2, suggestedShortDescription, 'shortDescription' );
editSummaryItems.push( '+{{Short description}}' );
}
}
// DraftCleaner: convert H1 to H2
wikicodeBefore = wikicode2;
wikicode2 = convertH1ToH2( wikicode2 );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( '<h1> to <h2>' );
}
/*
// Commenting out. Some articles do need both, I think.
// Example: https://en.wikipedia.org/w/index.php?title=Moringa_oleifera&oldid=1176702791
// remove {{Authority control}} if {{Taxonbar}} present
wikicodeBefore = wikicode2;
wikicode2 = removeAuthorityControlIfTaxonbarPresent(wikicode2);
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push('-{{Authority control}}');
}
*/
// remove empty {{Default sort}}
// remove {{Default sort}} identical to article title
wikicodeBefore = wikicode2;
const titleNoNamespaceNoUnderscores = titleNoNamespace.replace( '_', ' ' );
wikicode2 = removeEmptyDefaultSort( wikicode2 );
wikicode2 = removeDefaultSortIdenticalToTitle( wikicode2, titleNoNamespaceNoUnderscores );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( '-{{Default sort}}' );
}
// if the only change was a change in capitalization or in # of enters, rollback the change, not worth such a minor edit
if ( isMinorChange( wikicode, wikicode2 ) ) {
wikicode2 = wikicode;
}
// DraftCleaner: fix extra whitespace/newlines, especially above ==References== section. Count it as a major change, force it no matter what.
wikicodeBefore = wikicode2;
wikicode2 = deleteMoreThanTwoEntersInARowBeforeReferences( wikicode2, mopf );
if ( wikicode2 !== wikicodeBefore ) {
editSummaryItems.push( 'fix whitespace' );
}
// DraftCleaner: fix extra whitespace/newlines everywhere. count it as a minor change, can be skipped if no other changes
wikicode2 = deleteMoreThanTwoEntersInARow( wikicode2 );
if ( wikicode.trim() === wikicode2.trim() || editSummaryItems.length === 0 ) {
alert( 'No changes needed.' );
return;
}
wikicode2 = wikicode2.trim();
let editSummary = '';
for ( const item of editSummaryItems ) {
editSummary += `${ item }, `;
}
editSummary = editSummary.slice( 0, -2 ); // delete , at end of string
editSummary += ' ([[User:Novem Linguae/Scripts/SpeciesHelper|SpeciesHelper]])';
goToShowChangesScreen( title, wikicode2, editSummary );
}
} );
/*
- New article list, for testing: https://en.wikipedia.org/wiki/User:AlexNewArtBot/PlantsSearchResult
- TODO:
- If in mainspace, and no "en" entry in wikidata, add it to wikidata
- If there's a category in the article that doesn't exist, remove it. [[WP:CATREDLINK]]
- Handle articles that are higher up in the tree of life than genus+species.
- Need to check genus+species with that API invoke query right away.
- If it isn't a genus, then assume it's higher up. Use {{Automatic taxobox}} instead of {{Speciesbox}}
- New pages feeds to test things out
- https://en.wikipedia.org/wiki/User:AlexNewArtBot#Biology_and_medicine
*/
// === modules/ConvertToSpeciesBox.js ======================================================
class ConvertToSpeciesBox {
convert( wikicode2 ) {
// remove {{Italic title}}, 'DISPLAYTITLE', 'Lowercase title'
wikicode2 = wikicode2.replace( /\{\{(?:Italic title|DISPLAYTITLE|Lowercase title)[^}]*\}\}\n?/gsi, '' );
// Convert {{Taxobox or {{Automatic taxobox to {{Speciesbox
wikicode2 = wikicode2.replace( /(?<=\{\{)(?:Taxobox|Automatic taxobox)(?=[\s\n}])/i, 'Speciesbox' );
// handle extinct species
const hasExtinctTemplate = wikicode2.match( /(\|\s*species\s*=\s*)\{\{Extinct\}\}/i );
const hasDagger = wikicode2.match( /(\|\s*species\s*=\s*)†/i );
const hasExtinctParameter = wikicode2.match( /\|\s*extinct\s*=/i );
if ( ( hasExtinctTemplate || hasDagger ) && !hasExtinctParameter ) {
// add |extinct=yes
wikicode2 = this._addToSpeciesBox( wikicode2, '| extinct = yes\n' );
}
// delete dagger
wikicode2 = wikicode2.replace( /(\|\s*genus\s*=\s*)†/i, '$1' );
wikicode2 = wikicode2.replace( /(\|\s*species\s*=\s*)†/i, '$1' );
// delete {{Extinct}}
wikicode2 = wikicode2.replace( /(\|\s*genus\s*=\s*)\{\{Extinct\}\}/i, '$1' );
wikicode2 = wikicode2.replace( /(\|\s*species\s*=\s*)\{\{Extinct\}\}/i, '$1' );
// genus: remove italics and wikilinks
// | genus = ''[[Semicassis]]'' -> | genus = Semicassis
// | genus = '''''Semicassis''''' -> | genus = Semicassis
wikicode2 = wikicode2.replace( /(\|\s*genus\s*=\s*)'*\[*([^|}'\]]*)(?:\|[^|}'\]]*)?\]*'* *$/im, '$1$2' );
// species: remove bold, italic, wikilinks, and beginning initial
// | species = '''''S. faurotis''''' -> | species = faurotis
// | species = '''''Semicassis faurotis''''' -> | species = faurotis
// | species = [[moreauviae]] -> | species = moreauviae
wikicode2 = wikicode2.replace( /(\|\s*species\s*=\s*)\[*'*(?:[A-Za-z.]+ )?([^'|}\]]*)'*\]* */is, '$1$2' );
// if genus and binomial but no species, add species
const hasGenus = wikicode2.match( /\|\s*genus\s*=\s*([A-Za-z ()]+?)\s*[\n|}]/ );
let hasSpecies = wikicode2.match( /\|\s*species\s*=\s*([A-Za-z ()]+?)\s*[\n|}]/ );
const hasBinomial = wikicode2.match( /\|\s*binomial\s*=\s*'{0,5}([A-Za-z()]+?) ([A-Za-z()]+?)'{0,5}\s*[\n|}]/ );
if ( hasBinomial && hasGenus && !hasSpecies ) {
const species = hasBinomial[ 2 ];
wikicode2 = wikicode2.replace( /(\|\s*genus\s*=\s*([A-Za-z ()]+?))([\n|}])/, `$1\n| species = ${ species }$3` );
}
// TODO: copy/paste this list: https://en.wikipedia.org/wiki/Wikipedia:Automated_taxobox_system/checked_ranks
const removeParametersList = [
'binomial',
'classis',
'color',
'divisio',
'domain',
'familia_authority',
'familia',
'genus_authority',
'image_width', // throws an error when previewing, I think Speciesbox computes these automatically
'image2_width',
'infraclassis',
'infraordo',
'infraphylum',
'ordo',
'phylum',
'regnum',
'subclassis',
'subfamilia',
'subgenus',
'subordo',
'subphylum_authority',
'subphylum',
'subregnum',
'superclassis_authority',
'superclassis',
'superdivisio',
'superfamilia',
'superordo',
'tribus',
'tribus_authority',
'unranked_classis',
'unranked_divisio',
'unranked_ordo',
'unranked_phylum',
'unranked_regnum',
'unranked_superfamilia',
'unranked_superphylum'
];
for ( const parameter of removeParametersList ) {
const regex = new RegExp( '\\|\\s*' + parameter + '\\s*=.*?\\n(?=[|}])', 'is' );
wikicode2 = wikicode2.replace( regex, '' );
}
// remove all blank parameters, but only in this taxobox
// 1) tidies the code, 2) helps prevent duplicate |authority= parameters
wikicode2 = this._removeBlankParametersFromFirstTemplate( 'speciesbox', wikicode2 );
// rename binomial_authority, species_authority, and type_species_authority to authority
wikicode2 = wikicode2.replace( /(\|\s*)binomial_(authority\s*=)/is, '$1$2' );
wikicode2 = wikicode2.replace( /(\|\s*)type_species_(authority\s*=)/is, '$1$2' );
wikicode2 = wikicode2.replace( /(\|\s*)species_(authority\s*=)/is, '$1$2' );
// remove |name= if it contains the latin name. leave alone if non-latin name
hasSpecies = wikicode2.match( /\|\s*species\s*=\s*([A-Za-z ()]+?)\s*[\n|}]/ );
if ( hasGenus && hasSpecies ) {
const genus = this._regExEscape( hasGenus[ 1 ] );
const species = this._regExEscape( hasSpecies[ 1 ] );
const regex = new RegExp( "\\|\\s*name\\s*=\\s*\\[*'*" + genus + ' ' + species + "'*\\]*\\s*(?=[\\n|}])", 'i' );
wikicode2 = wikicode2.replace( regex, '' );
}
return wikicode2;
}
_regExEscape( string ) {
return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
}
_addToSpeciesBox( wikicode2, textToAdd ) {
return wikicode2.replace( /(\{\{Speciesbox\s*\n)/i, '$1' + textToAdd );
}
/**
* @param templateName Needle. A template name, case insensitive. So for example, to find {{Speciesbox}}, you'd input speciesbox with no braces.
* @param wikicode Haystack. A bunch of wikicode.
*/
_removeBlankParametersFromFirstTemplate( templateName, wikicode ) {
const thirds = this._isolateFirstTemplate( templateName, wikicode );
// eliminate blank parameters without enters at the end of the line
thirds[ 1 ] = thirds[ 1 ].replace( /\|[^=]*=[ \t]*(?=\|)/g, '' );
// eliminate blank parameters with enters at the end of the line
thirds[ 1 ] = thirds[ 1 ].replace( /\|[^=\]\n]*=[ \t]*\n(?=\}|\|)/g, '' );
return thirds.join( '' );
}
_indexOfCaseInsensitive( needle, haystack ) {
return haystack.search( new RegExp( this._regExEscape( needle ), 'i' ) );
}
/**
* @return {string[]} [preTemplateText, templateText, postTemplateText]
*/
_isolateFirstTemplate( needle, haystack ) {
// isolate first template needle that is found
const templateWikicodeStartPos = this._indexOfCaseInsensitive( '{{' + needle, haystack );
// if not found
if ( templateWikicodeStartPos === -1 ) {
return [ haystack, '', '' ];
}
let nestingCount = 0;
let i = 0;
for ( i = templateWikicodeStartPos; i < haystack.length; i++ ) {
const nextTwo = haystack.slice( i, i + 2 );
if ( nextTwo === '{{' ) {
nestingCount++;
} else if ( nextTwo === '}}' ) {
nestingCount--;
}
if ( nestingCount === 0 ) {
break;
}
}
const templateWikicodeEndPos = i + 2;
const thirds = [];
thirds[ 0 ] = haystack.slice( 0, templateWikicodeStartPos );
thirds[ 1 ] = haystack.slice( templateWikicodeStartPos, templateWikicodeEndPos );
thirds[ 2 ] = haystack.slice( templateWikicodeEndPos );
return thirds;
}
}
// === modules/Inflect.js ======================================================
// Sho Kuwamoto, MIT License, https://web.archive.org/web/20190929193523/http://kuwamoto.org/2007/12/17/improved-pluralizing-in-php-actionscript-and-ror/
// The above link also has pluralize() if I ever need it. Deleted it for now as dead code.
class Inflect {
constructor() {
/* eslint-disable key-spacing, quote-props */
this.singular = {
'(quiz)zes$' : '$1',
'(matr)ices$' : '$1ix',
'(vert|ind)ices$' : '$1ex',
'^(ox)en$' : '$1',
'(alias)es$' : '$1',
'(octop|vir)i$' : '$1us',
'(cris|ax|test)es$' : '$1is',
'(shoe)s$' : '$1',
'(o)es$' : '$1',
'(bus)es$' : '$1',
'([m|l])ice$' : '$1ouse',
'(x|ch|ss|sh)es$' : '$1',
'(m)ovies$' : '$1ovie',
'(s)eries$' : '$1eries',
'([^aeiouy]|qu)ies$' : '$1y',
'([lr])ves$' : '$1f',
'(tive)s$' : '$1',
'(hive)s$' : '$1',
'(li|wi|kni)ves$' : '$1fe',
'(shea|loa|lea|thie)ves$': '$1f',
'(^analy)ses$' : '$1sis',
'((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$' : '$1$2sis',
'([ti])a$' : '$1um',
'(n)ews$' : '$1ews',
'(h|bl)ouses$' : '$1ouse',
'(corpse)s$' : '$1',
'(us)es$' : '$1',
's$' : ''
};
/* eslint-enable key-spacing */
this.irregular = {
move: 'moves',
foot: 'feet',
goose: 'geese',
sex: 'sexes',
child: 'children',
man: 'men',
tooth: 'teeth',
person: 'people',
fungus: 'fungi',
bivalve: 'bivalves',
genus: 'genera',
mantis: 'mantises'
};
this.uncountable = [
'sheep',
'fish',
'deer',
'series',
'species',
'money',
'rice',
'information',
'equipment'
];
this.skipWhenSingularizing = [
'grass',
'virus',
'fungus',
'genus',
'mantis',
'moss'
];
}
/** Convert an English noun from plural to singular. Should be very reliable for incoming plurals. A bit buggy with incoming singulars. */
singularize( string ) {
// save some time in the case that singular and plural are the same
if ( this.uncountable.includes( string.toLowerCase() ) ) {
return string;
}
// if already singular, skip certain ones that confuse the regex's. this code wasn't originally designed to handle input that was already singular
if ( this.skipWhenSingularizing.includes( string.toLowerCase() ) ) {
return string;
}
// check for irregular plural forms
for ( const result in this.irregular ) {
let pattern = this.irregular[ result ];
pattern = new RegExp( pattern + '$', 'i' );
if ( string.match( pattern ) ) {
return string.replace( pattern, result );
}
}
// check for matches using regular expressions
for ( let pattern in this.singular ) {
const result = this.singular[ pattern ];
pattern = new RegExp( pattern, 'i' );
if ( string.match( pattern ) ) {
return string.replace( pattern, result );
}
}
return string;
}
}
// === modules/MOSOrderPositionFinder.js ======================================================
// Making this more complex than needed for this program. There are some other things I can use this for and I want to start developing it.
/** This class acts static. You pass in the wikicode variable to each method, and each method outputs the modified wikicode variable. */
class MOSOrderPositionFinder {
_calculate() {
this.sectionOrder = [
'top',
'shortDescription',
'displayTitle',
'hatnotes',
'featured',
'deletionAndProtection',
'maintenanceTags',
'engvar',
'infoboxes',
'languageScriptNeeded',
'sidebars',
'lead',
'tableOfContents',
'body',
'worksOrPublications',
'seeAlso',
'notesAndReferences',
'furtherReading',
'externalLinks',
'successionAndGeographyBoxes',
'navboxes',
'portalBar',
'taxonBar',
'authorityControl',
'geographicalCoordinates',
'defaultSort',
'categories',
'improveCategories',
'stubTemplates',
'bottom'
];
this.sectionStartPositions = {
top: 0,
shortDescription: -1,
displayTitle: -1,
hatnotes: -1,
featured: -1,
deletionAndProtection: -1,
maintenanceTags: -1,
engvar: -1,
infoboxes: -1,
languageScriptNeeded: -1,
sidebars: -1,
lead: -1,
tableOfContents: -1,
body: -1,
worksOrPublications: -1,
seeAlso: -1,
notesAndReferences: -1,
furtherReading: -1,
externalLinks: -1,
successionAndGeographyBoxes: -1,
navboxes: -1,
portalBar: -1,
taxonBar: -1,
authorityControl: -1,
geographicalCoordinates: -1,
defaultSort: -1,
categories: -1,
stubTemplates: -1,
improveCategories: -1,
bottom: this.wikitext.length
};
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:Short_description&hidelinks=1&hidetrans=1
this.sectionStartPositions.shortDescription = this._lookForTemplates( this.wikitext, [
'Short description',
'Shortdesc',
'Shortdescription',
'Short desc'
] );
this.sectionStartPositions.displayTitle = this._lookForTemplates( this.wikitext, [
'DISPLAYTITLE',
'Lowercase title',
'Italic title'
] );
// https://en.wikipedia.org/wiki/Wikipedia:Hatnote
this.sectionStartPositions.hatnotes = this._lookForTemplates( this.wikitext, [
'About-distinguish',
'About',
'About other people',
'Distinguish',
'For',
'For2',
'Hatnote',
'Other hurricanes',
'Other people',
'Other places',
'Other ships',
'Other uses of',
'Other uses',
'Other uses',
'Redirect-distinguish',
'Redirect-distinguish-text',
'Redirect-distinguish2',
'Redirect-multi',
'Redirect',
'Redirect2',
'Self reference',
'Similar names',
'Technical reasons',
'Malay name' // TODO: add more {{X name}} type templates.
] );
this.sectionStartPositions.featured = this._lookForTemplates( this.wikitext, [
'Featured list',
'Featured article',
'Good article'
] );
// https://en.wikipedia.org/wiki/Wikipedia:Criteria_for_speedy_deletion
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:Proposed_deletion&hidelinks=1&hidetrans=1
this.sectionStartPositions.deletionAndProtection = this._lookForTemplates( this.wikitext, [
'Db-a1', // CSD
'Db-a10',
'Db-a11',
'Db-a2',
'Db-a3',
'Db-a5',
'Db-a7',
'Db-a9',
'Db-afc',
'Db-album',
'Db-animal',
'Db-attack',
'Db-attackorg',
'Db-author',
'Db-badfairuse',
'Db-badfiletype',
'Db-band',
'Db-banned',
'db-blankdraft',
'Db-blanked',
'Db-c1',
'db-catempty',
'Db-club',
'Db-contact',
'Db-copypaste',
'Db-copyvio',
'Db-disambig',
'Db-discog',
'Db-empty',
'Db-emptyportal',
'Db-error',
'Db-event',
'Db-f1',
'Db-f10',
'Db-f2',
'Db-f3',
'Db-f5',
'Db-f7',
'Db-f8',
'Db-f9',
'Db-filecopyvio',
'Db-foreign',
'Db-fpcfail',
'Db-g1',
'Db-g10',
'Db-g11',
'Db-g12',
'Db-g13',
'Db-g14',
'Db-g2',
'Db-g3',
'Db-g4',
'Db-g5',
'Db-g6',
'Db-g7',
'Db-g8',
'Db-hoax',
'Db-imagepage',
'Db-inc',
'Db-invented',
'Db-madeup',
'Db-move',
'Db-moved',
'Db-multiple',
'Db-negublp',
'Db-nocontent',
'Db-nocontext',
'Db-nofile',
'Db-noncom',
'Db-nonsense',
'Db-notwebhost',
'Db-nouser',
'Db-p1',
'Db-p2',
'Db-person',
'Db-personal attack',
'Db-promo',
'Db-r2',
'Db-r3',
'Db-r4',
'Db-redircom',
'Db-redirnone',
'Db-redirtypo',
'Db-rediruser',
'Db-redundantfile',
'Db-repost',
'Db-same',
'Db-self',
'Db-song',
'Db-spam',
'Db-spamuser',
'Db-subpage',
'Db-talk',
'Db-templatecat',
'Db-test',
'Db-transwiki',
'Db-u1',
'Db-u2',
'Db-u5',
'Db-userreq',
'Db-vandalism',
'Db-web',
'Db-xfd',
'Proposed deletion', // PROD
'Prod',
'Proposed delete',
'Proposal to delete',
'Propose deletion',
'Draft-prod',
'Article for deletion', // AFD
'Pp' // page protection padlocks, includes {{Pp}} and {{Pp-*}}
] );
// Source: Twinkle
this.sectionStartPositions.maintenanceTags = this._lookForTemplates( this.wikitext, [
'Multiple issues', // not from Twinkle
'Cleanup',
'Cleanup rewrite',
'Copy edit',
'Close paraphrasing',
'Copypaste',
'External links',
'Non-free',
'Cleanup reorganize',
'Lead missing',
'Lead rewrite',
'Lead too long',
'Lead too short',
'Sections',
'Too many sections',
'Very long',
'All plot',
'Fiction',
'In-universe',
'Long plot',
'No plot',
'Notability',
'Advert',
'Cleanup tense',
'Essay-like',
'Fanpov',
'Like resume',
'Manual',
'Cleanup-PR',
'Over-quotation',
'Prose',
'Technical',
'Tone',
'Confusing',
'Incomprehensible',
'Unfocused',
'Context',
'Expert needed',
'Overly detailed',
'Undue weight',
'Current',
'Update',
'Autobiography',
'COI',
'Disputed',
'Hoax',
'Globalize',
'Over-coverage',
'Paid contributions',
'Peacock',
'POV',
'Recentism',
'Refimprove',
'Too few opinions',
'Undisclosed paid',
'Weasel',
'BLP sources',
'BLP unsourced',
'More citations needed',
'One source',
'Original research',
'Primary',
'Primary sources',
'Self-published',
'Sources exist',
'Third-party',
'Unreferenced',
'Unreliable sources',
'Not English',
'Rough translation',
'Expand language',
'Dead end',
'Orphan',
'Overlinked',
'Underlinked',
'Citation style',
'Cleanup bare URLs',
'More footnotes',
'No footnotes',
'Uncategorized',
'History merge',
'Merge',
'Merge from',
'Merge to',
'GOCEinuse',
'In use',
'Under construction'
] );
// https://en.wikipedia.org/wiki/Template:Use_X_English
this.sectionStartPositions.engvar = this._lookForTemplates( this.wikitext, [
'Engvar', // engvar
'EngvarA',
'EngvarB',
'Use American English',
'Use Australian English',
'Use Bangladeshi English',
'Use British English',
'Use Oxford spelling',
'Use Canadian English',
'Use Ghanaian English',
'Use Hiberno-English',
'Use Hong Kong English',
'Use Indian English',
'Use Jamaican English',
'Use Kenyan English',
'Use Liberian English',
'Use New Zealand English',
'Use Nigerian English',
'Use Pakistani English',
'Use Philippine English',
'Use Singapore English',
'Use South African English',
'Use Trinidad and Tobago English',
'Use dmy dates', // dates
'Use mdy dates'
] );
this.sectionStartPositions.infoboxes = this._lookForTemplates( this.wikitext, [
'Infobox',
'Speciesbox',
'Species box',
'Automatic taxobox',
'Taxobox',
'Subspeciesbox',
'Infraspeciesbox',
'Hybridbox',
'Virusbox'
] );
// https://en.wikipedia.org/wiki/Category:Language_maintenance_templates
this.sectionStartPositions.languageScriptNeeded = this._lookForTemplates( this.wikitext, [
'Arabic script needed',
'Armenian script needed',
'Berber script needed',
'Burmese script needed',
'Cherokee script needed',
'Chinese script needed',
'Chinese script needed inline',
'Contains special characters',
'Devanagari script needed',
'Egyptian hieroglyphic script needed',
'EngvarB',
'Ge\'ez script needed',
'Georgian script needed',
'Greek script needed',
'Hebrew script needed',
'IPA-ga notice',
'Japanese script needed',
'Khmer script needed',
'Korean script needed',
'Lao script needed',
'Needchunom',
'Needhanja',
'Needhiragana',
'Needkanji',
'Needs IPA',
'Nepali script needed',
'Persian script needed',
'Pronunciation needed',
'Romanization needed',
'Samoan script needed',
'Syriac script needed',
'Tamil script needed',
'Thai script needed',
'Tibetan script needed',
'Tok Pisin script needed',
'User language subcategory',
'User programming subcategory',
'Verify spelling',
'Vietnamese script needed',
'Yiddish script needed'
] );
// No reliable way to search for these. Some end in sidebar, many don't. Example of ones that don't: [[Rohingya genocide]] -> {{Rohingya conflict}}, {{Genocide}}.
// TODO: Will need to return the position of any template between top and first sentence that isn't in one of the lists above.
this.sectionStartPositions.sidebars = -1;
// Return first text that isn't whitespace, a template, or inside a template. This is the article's first sentence.
this.sectionStartPositions.lead = this._getFirstNonTemplateNonWhitespace( this.wikitext );
// https://en.wikipedia.org/wiki/Help:Magic_words#Behavior_switches
this.sectionStartPositions.tableOfContents = this._lookForStrings( this.wikitext, [
'__TOC__'
] );
this.sectionStartPositions.body = this._lookForRegEx( this.wikitext, /(?<=\n)==/i );
this.sectionStartPositions.worksOrPublications = this._lookForHeadings( this.wikitext, [
'Works\\s*==',
'Publications',
'Discography',
'Filmography'
] );
this.sectionStartPositions.seeAlso = this._lookForHeadings( this.wikitext, [
'See also'
] );
this.sectionStartPositions.notesAndReferences = this._lookForHeadings( this.wikitext, [
'Bibliography',
'Citations',
'Endnotes',
'Footnotes',
'Notes',
'References',
'Sources',
'Works cited'
] );
this.sectionStartPositions.furtherReading = this._lookForHeadings( this.wikitext, [
'Further reading'
] );
this.sectionStartPositions.externalLinks = this._lookForHeadings( this.wikitext, [
'External links'
] );
// https://en.wikipedia.org/wiki/Wikipedia:WikiProject_Succession_Box_Standardization/Templates
// TODO: figure out what "geography boxes" are, add them
this.sectionStartPositions.successionAndGeographyBoxes = this._lookForTemplates( this.wikitext, [
'S-' // they all are of the format {{S-*}}
] );
// Hard to figure these out, unless they are contained in the {{Navbox}} wrapper
// TODO: assume any templates in this area that aren't on other lists are navboxes
// https://en.wikipedia.org/w/index.php?title=Special%3AWhatLinksHere&hidetrans=1&hidelinks=1&target=Template%3ANavbox&namespace=
this.sectionStartPositions.navboxes = this._lookForTemplates( this.wikitext, [
'Navbox',
'Dynamic navigation box',
'Navigation',
'Hider hiding',
'Horror navbox',
'VG navigation',
'CVG navigation',
'TransNB',
'Navtable'
] );
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:Portal_bar&hidetrans=1&hidelinks=1
this.sectionStartPositions.portalBar = this._lookForTemplates( this.wikitext, [
'Portal bar',
'Prb'
] );
// https://en.wikipedia.org/w/index.php?title=Special%3AWhatLinksHere&hidetrans=1&hidelinks=1&target=Template%3ATaxonbar&namespace=
this.sectionStartPositions.taxonBar = this._lookForTemplates( this.wikitext, [
'Taxonbar',
'Taxon-bar',
'Taxobar',
'TaxonIDs',
'Taxon identifiers',
'Taxon bar'
] );
// https://en.wikipedia.org/w/index.php?title=Special%3AWhatLinksHere&hidetrans=1&hidelinks=1&target=Template%3AAuthority+control&namespace=
this.sectionStartPositions.authorityControl = this._lookForTemplates( this.wikitext, [
'Authority control',
'Normdaten',
'Authoritycontrol',
'External identifiers',
'Autorité',
'Control de autoridades',
'전거 통제',
'Normativna kontrola'
] );
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:Coord&hidetrans=1&hidelinks=1
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:Coord_missing&hidetrans=1&hidelinks=1
// using _findStringNotInsideTemplate because {{Coord}} inside infobox doesn't count
this.sectionStartPositions.geographicalCoordinates = this._findTemplateNotInsideTemplate( this.wikitext, [
'Coord', // coord
'Coor',
'Location',
'Geocoord',
'Geobox coor',
'Co-ord',
'Coord missing', // coord missing
'No geolocation',
'Missing coord',
'Coords missing',
'Locate me',
'Needs coordinates'
] );
// https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Template:DEFAULTSORT&hidetrans=1&hidelinks=1
this.sectionStartPositions.defaultSort = this._lookForTemplates( this.wikitext, [
'DEFAULTSORT',
'Default sort',
'SORTIERUNG'
] );
this.sectionStartPositions.categories = this._lookForRegEx( this.wikitext, /\[\[:?Category:/i );
this.sectionStartPositions.stubTemplates = this._lookForRegEx( this.wikitext, /\{\{[^}]*-stub\}\}/i );
this.sectionStartPositions.improveCategories = this._lookForTemplates( this.wikitext, [ 'Improve categories' ] );
// If the body is the same position as any of the appendices, set body to -1, since there isn't really a body, just appendices.
const appendices = [
this.sectionStartPositions.worksOrPublications,
this.sectionStartPositions.seeAlso,
this.sectionStartPositions.notesAndReferences,
this.sectionStartPositions.furtherReading,
this.sectionStartPositions.externalLinks
];
if ( this.sectionStartPositions.body !== -1 && appendices.includes( this.sectionStartPositions.body ) ) {
this.sectionStartPositions.body = -1;
}
if ( this.debug ) {
for ( const section of this.sectionOrder ) {
const position = this._getSectionStartPosition( section );
const chunkPreview = this.wikitext.slice( position, position + 50 );
console.log( `${ section }: ${ position }: ${ chunkPreview }` );
}
}
}
/** To find the location of the lead of an article, determine the location of the first non-template, non-whitespace, and non-HTML comment. Non-HTML comment handles an edge case involving AFC drafts. */
_getFirstNonTemplateNonWhitespace( wikicode ) {
const length = wikicode.length;
let nesting = 0;
for ( let i = 0; i < length; i++ ) {
const chunk = wikicode.slice( i );
if ( chunk.startsWith( '{{' ) || chunk.startsWith( '<!--' ) ) {
nesting++;
} else if ( chunk.startsWith( '}}' ) || chunk.startsWith( '->' ) ) { // chunks in this conditional must only be 2 characters wide
nesting--;
i++; // skip 2nd }
} else if ( nesting === 0 && !chunk.match( /^\s/ ) ) {
return i;
}
}
return -1;
}
_findTemplateNotInsideTemplate( wikicode, arrayOfStrings ) {
const length = wikicode.length;
for ( const string of arrayOfStrings ) {
let nesting = 0;
for ( let i = 0; i < length; i++ ) {
const chunk = wikicode.slice( i, i + 20 );
const match = chunk.match( new RegExp( '^\\{\\{' + string, 'i' ) );
if ( nesting === 0 && match ) {
return i;
} else if ( chunk.startsWith( '{{' ) ) {
nesting++;
} else if ( chunk.startsWith( '}}' ) ) {
nesting--;
i++; // skip 2nd }
}
}
}
return -1;
}
/** Template names are not RegEx escaped. */
_lookForTemplates( haystack, arrayOfTemplateNames ) {
let regExString = '\\{\\{(?:';
for ( let name of arrayOfTemplateNames ) {
// space or underscore, same thing
name = name.replace( '_', ' ' );
name = name.replace( ' ', '[ _]' );
regExString += name + '|';
}
regExString = regExString.slice( 0, -1 ); // delete last character |
regExString += ')(?![ -\\|]section)'; // don't match section maintenance tags, e.g. {{More citations needed section}} and {{More citations needed|section}}
const matches = haystack.match( new RegExp( regExString, 'i' ) );
return matches ? matches.index : -1;
}
/** Heading names are not RegEx escaped. */
_lookForHeadings( haystack, arrayOfHeadingNames ) {
let regExString = '={2,}\\s*(?:';
for ( const name of arrayOfHeadingNames ) {
regExString += name + '|';
}
regExString = regExString.slice( 0, -1 ); // delete last character |
regExString += ')';
const matches = haystack.match( new RegExp( regExString, 'i' ) );
return matches ? matches.index : -1;
}
_lookForStrings( haystack, arrayOfRegExStrings ) {
let regExString = '(?:';
for ( const name of arrayOfRegExStrings ) {
regExString += name + '|';
}
regExString = regExString.slice( 0, -1 ); // delete last character |
regExString += ')';
const matches = haystack.match( new RegExp( regExString, 'i' ) );
return matches ? matches.index : -1;
}
_lookForRegEx( haystack, regEx ) {
const matches = haystack.match( regEx );
return matches ? matches.index : -1;
}
hasSection( wikicode, section ) {
this.wikitext = wikicode;
this._calculate();
return this._getSectionStartPosition( section ) !== -1;
}
/** @return {number} sectionPosition: -1 if no section, integer if section */
getSectionPosition( wikicode, section ) {
this.wikitext = wikicode;
this._calculate();
let position = this._getSectionStartPosition( section );
if ( position === -1 ) {
position = this._getPositionOfClosestSection( section );
}
return position;
}
_getSectionStartPosition( section ) {
const validSection = section in this.sectionStartPositions;
if ( !validSection ) {
throw new Error( 'MOSOrderPositionFinder: Invalid section name.' );
}
return this.sectionStartPositions[ section ];
}
/** Useful for testing. Returns all section positions. */
getAllSectionPositions( wikicode ) {
this.wikitext = wikicode;
this._calculate();
return this.sectionStartPositions;
}
/** Useful for testing. Returns all section positions that exist (that aren't -1). */
getAllExistingSectionPositions( wikicode ) {
this.wikitext = wikicode;
this._calculate();
const sections = {};
for ( const key in this.sectionStartPositions ) {
if ( this.sectionStartPositions[ key ] !== -1 ) {
sections[ key ] = this.sectionStartPositions[ key ];
}
}
return sections;
}
/** If section is absent, we will guess where the section should go. Do not add whitespace, we will figure it out for you. */
insertAtSection( wikicode, needle, section ) {
this.wikitext = wikicode;
// fix more than two enters in a row
// this.wikitext = this.wikitext.replace(/\n{3,}/g, '\n\n');
this._calculate();
let position = this._getSectionStartPosition( section );
if ( typeof position === 'undefined' ) {
throw new Error( 'MOSOrderPositionFinder: invalid section supplied to function insertAtSection()' );
}
let hadToCreateNewSection = false;
if ( position === -1 ) {
position = this._getPositionOfClosestSection( section );
hadToCreateNewSection = true;
}
let topHalf = this.wikitext.slice( 0, position );
let bottomHalf = this.wikitext.slice( position );
// TODO: these are band aid fixes, they need a rewrite. should probably add the ideal # of blank lines beneath each section to the list of sections, and then do a foreach loop through that
// if too much whitespace, reduce amount of whitespace
if ( section === 'stubTemplates' ) {
topHalf = topHalf.replace( /\n{4,}$/, '\n\n\n' );
} else {
topHalf = topHalf.replace( /\n{3,}$/, '\n\n' );
}
bottomHalf = bottomHalf.replace( /^\n{3,}/, '\n\n' );
if ( topHalf.endsWith( '\n\n' ) ) {
// intentionally left blank
} else if ( topHalf.endsWith( '\n' ) ) {
topHalf += '\n';
} else {
topHalf += '\n\n';
}
if ( !bottomHalf.startsWith( '\n' ) ) {
bottomHalf = '\n' + bottomHalf;
}
if ( hadToCreateNewSection && !bottomHalf.startsWith( '\n\n' ) ) {
bottomHalf = '\n' + bottomHalf;
}
this.wikitext = topHalf + needle + bottomHalf;
if ( section === 'shortDescription' ) {
// if template beneath the insertion point, don't put a blank line between SD and other template
this.wikitext = this.wikitext.replace( /(\{\{(?:Short description|Shortdesc|Shortdescription|Short desc)\|[^}]+\}\}\n)\n(\{\{)/is, '$1$2' );
}
// this.wikitext = this.wikitext.trim();
return this.wikitext;
}
// https://stackoverflow.com/a/13109786/3480193
_arraySearch( arr, val ) {
for ( let i = 0; i < arr.length; i++ ) {
if ( arr[ i ] === val ) {
return i;
}
}
return false;
}
_getPositionOfClosestSection( section ) {
const sectionKey = this._arraySearch( this.sectionOrder, section );
// scan until you find a section that is not -1
// can scan in either direction. I chose to scan down.
for ( let i = sectionKey; i < this.sectionOrder.length; i++ ) {
const sectionKey2 = this.sectionOrder[ i ];
const sectionPosition = this.sectionStartPositions[ sectionKey2 ];
if ( sectionPosition !== -1 ) {
return sectionPosition;
}
}
}
}
// === modules/functions.js ======================================================
/* eslint-disable */
/* eslint-enable */
/* eslint-disable security/detect-bidi-characters */
// TODO: convert to class. name it SHUtil or SpeciesHelperUtil or something. or move all these to main and make a giant SpeciesHelperController class, then extract some classes out of that
async function getWikicodeOfDiff( diffID ) {
const pageIsDeleted = !mw.config.get( 'wgCurRevisionId' );
if ( pageIsDeleted ) {
return '';
}
let wikicode = '';
diffID = encodeURIComponent( diffID );
await $.ajax( {
url: 'https://en.wikipedia.org/w/api.php?action=parse&oldid=' + diffID + '&prop=wikitext&formatversion=2&format=json',
success: function ( result ) {
wikicode = result.parse.wikitext;
},
dataType: 'json'
} );
return wikicode;
}
async function getWikidataID( title ) {
const api = new mw.ForeignApi( 'https://www.wikidata.org/w/api.php' );
const response = await api.get( {
action: 'wbsearchentities',
format: 'json',
search: title,
language: 'en'
} );
if ( !response.search.length ) {
return '';
}
return response.search[ 0 ].id;
}
async function getTaxa( genus ) {
// Getting tree of life via API notes: https://en.wikipedia.org/wiki/Wikipedia_talk:Automated_taxobox_system#Family_for_user_script - {{#invoke:Autotaxobox|listAll|Bellis}} → Bellis-genus, Astereae-tribus, Asterodae-supertribus, etc.
const api = new mw.Api();
const response = await api.get( {
action: 'expandtemplates',
format: 'json',
text: `{{#invoke:Autotaxobox|listAll|${ genus }}}`,
prop: 'wikitext'
} );
let taxa = response.expandtemplates.wikitext;
if ( taxa.match( /^[^-]+-$/i ) ) { // when taxonomy template is missing, it will return something like Genus-
taxa = '';
}
return taxa;
}
async function doPagesExist( listOfPages ) {
const api = new mw.Api();
const response = await api.get( {
action: 'query',
format: 'json',
prop: 'revisions',
titles: listOfPages.join( '|' ) // | is an illegal title character, so no need to escape it
} );
const listOfLivePages = [];
const responsePages = response.query.pages;
for ( const [ key, value ] of Object.entries( responsePages ) ) {
if ( parseInt( key ) !== NaN && parseInt( key ) > 0 ) {
listOfLivePages.push( value.title );
}
}
return listOfLivePages;
}
function regExEscape( string ) {
return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
}
function goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, editSummaryItems ) {
const titleEncoded = encodeURIComponent( titleWithNamespaceAndUnderscores );
const wgServer = mw.config.get( 'wgServer' );
const wgScriptPath = mw.config.get( 'wgScriptPath' );
const baseURL = wgServer + wgScriptPath + '/';
// https://stackoverflow.com/a/12464290/3480193
$( `<form action="${ baseURL }index.php?title=${ titleEncoded }&action=submit" method="POST"/>` )
.append( $( '<input type="hidden" name="wpTextbox1">' ).val( wikicode ) )
.append( $( '<input type="hidden" name="wpSummary">' ).val( editSummaryItems ) )
.append( $( '<input type="hidden" name="mode">' ).val( 'preview' ) )
.append( $( '<input type="hidden" name="wpDiff">' ).val( 'Show changes' ) )
.append( $( '<input type="hidden" name="wpUltimateParam">' ).val( '1' ) )
.appendTo( $( document.body ) ) // it has to be added somewhere into the <body>
.trigger( 'submit' );
}
function shouldRunOnThisPage( title ) {
// don't run when not viewing articles
const action = mw.config.get( 'wgAction' );
if ( action !== 'view' ) {
return false;
}
// don't run when viewing diffs
// let isDiff = mw.config.get('wgDiffNewId');
// if ( isDiff ) return false;
const isDeletedPage = ( !mw.config.get( 'wgCurRevisionId' ) );
if ( isDeletedPage ) {
return false;
}
// Only run in mainspace or draftspace
const namespace = mw.config.get( 'wgNamespaceNumber' );
const isMainspaceOrDraftspace = ( [ 0, 118 ].includes( namespace ) );
if ( !isMainspaceOrDraftspace && !title.startsWith( 'User:Novem_Linguae/sandbox' ) ) {
return false;
}
return true;
}
function getPagesToCheck( taxa, listOfNonLatinSpeciesCategories ) {
const depthToCheck = 20; // there's an API limit on the # of pages you can check. possible to get around this with multiple API calls, if needed.
const chunk = taxa.slice( 0, depthToCheck );
let pagesToCheck = [];
// Check several levels of these
let i = 0;
for ( let piece of chunk ) {
i++;
// handle edge case "Incertae sedis" or "Incertae sedis/something"
piece = piece.replace( 'Incertae sedis', '' );
piece = piece.replace( '/', '' );
if ( !piece ) {
continue;
}
// stub
pagesToCheck.push( `Template:${ piece }-stub` );
// category
pagesToCheck.push( `Category:${ piece }` );
// some genus categories have disambugators at the end, such as (genus) or (plant). check for this edge case
if ( i === 1 ) {
pagesToCheck.push( `Category:${ piece } (genus)` );
}
// skip {{Viola-stub}}, false positive (is for the instrument, not the genus)
pagesToCheck = deleteFromArray( 'Template:Viola-stub', pagesToCheck );
}
// Replace any latin stubs that have non-latin equivalents, with the non-latin stub
const listOfNonLatinSpeciesStubs = {
// SENTENCE CASE. Left side spaces, right side dashes. Usually singular.
// latin (deprecated) -> non-latin (preferred)
// '': 'Abyssochrysidae', // TODO: make Template:Taxonomy
Acanthocephala: 'Acanthocephalan',
// '': 'Agonoxeninae', // "disputed"
// '': 'Alucitoidea', // Template:Taxonomy has a /? after it
Ammonoidea: 'Ammonite',
// '': 'Ancylonotini',
Anomodontia: 'Anomodont',
'Anthiinae (beetle)': 'AnthiinaeBeetle',
Archosauria: 'Archosaur',
Arthropoda: 'Arthropod',
// '': 'Australia-asterid',
// '': 'Australia-eudicot',
// '': 'Australia-plant',
// '': 'Australia-rosid',
// '': 'Autostichinae', // TODO: make Template:Taxonomy
Bambusoideae: 'Bamboo',
Bryophyta: 'Bryophyte',
Chiroptera: 'Bat',
Anthophila: 'Bee',
// '': 'Bicyclus', // TODO: make Template:Taxonomy
Bikonta: 'Bikont',
Osteichthyes: 'Bony-fish',
Brachiopoda: 'Brachiopod',
// '': 'Bradina', // TODO: make Template:Taxonomy
// '': 'Bryophyte', // proposed, polyphyletic
Bryozoa: 'Bryozoan',
// '': 'Buccinulidae', // TODO: make Template:Taxonomy
Rhopalocera: 'Butterfly',
// '': 'Cabniini', // TODO: make Template:Taxonomy
Gymnophiona: 'Caecilian',
// '': 'Caliphyllidae', // disputed
// '': 'Canid',
// '': 'Carposina',
// '': 'Ceromitia',
// '': 'Cettiidae',
Chamaeleonidae: 'Chameleon',
// '': 'Chilinidae',
Chilodontaidae: 'Chilodontidae-gastropod',
// '': 'Chlamydephoridae',
// '': 'Chordate',
// '': 'Ciliate',
// '': 'Cochlicopidae',
Colubridae: 'Colubrids',
Pinophyta: 'Conifer',
// '': 'Conodont',
Copepoda: 'Copepod',
Crustacea: 'Crustacean',
// '': 'Cryptoblabini',
Sepiida: 'Cuttlefish',
// '': 'Cycad',
Zygoptera: 'Damselfly',
Decapoda: 'Decapod',
// '': 'Depressaria',
// '': 'Dialidae',
// '': 'Diatom',
Dinoflagellata: 'Dinoflagellate',
Dinosauria: 'Dinosaur',
Diprotodontia: 'Diprotodont',
Dermaptera: 'Earwig',
// '': 'Eatoniellidae',
// '': 'Eburodacrys',
Echinodermata: 'Echinoderm',
// '': 'Egesina',
// '': 'Elaphidiini',
// '': 'Eoophyla',
// '': 'Erinaceomorpha',
Eukaryota: 'Eukaryote',
// '': 'Eventoedungulate',
Fabaceae: 'Fabaceae-tree',
Felidae: 'Feline',
Polypodiophyta: 'Fern',
Lampyridae: 'Firefly',
Platyhelminthes: 'Flatworm',
Flavobacteriia: 'Flavobacteria',
Siphonaptera: 'Flea',
Pteropodoidea: 'Fruit-bat',
// '': 'Glipa',
// '': 'Glottulinae',
Poaceae: 'Grass',
Marmotini: 'Ground-squirrel',
// '': 'Haplotrematidae',
// '': 'Hemichordate',
// '': 'Heterokont',
// '': 'Heteropsini',
// '': 'Heteropsis',
// '': 'Hydrolase',
// '': 'Hypercalliinae',
// '': 'Hypoptinae',
// '': 'Ichthyosaur',
// '': 'Isomerase',
// '': 'Jordanoleiopus',
// '': 'Lactobacilli',
Lagomorpha: 'Lagomorph',
// '': 'Lamprosema',
Phyllostomidae: 'Leafnosed-bat',
// '': 'Leptostylus',
// '': 'Lepturges',
// '': 'Ligase',
Sarcopterygii: 'Lobefinned-fish',
Phthiraptera: 'Louse',
// '': 'Lyase',
Lycophytes: 'Lycophyte',
// '': 'Macrosphenidae',
Magnoliids: 'Magnoliid',
Mammalia: 'Mammal',
Marsupialia: 'Marsupial',
// '': 'Megomphicidae',
// '': 'Methiini',
// '': 'Miaenia',
Mollusca: 'Mollusc',
Simiiformes: 'Monkey',
// '': 'Muroid',
// '': 'Mythimnini',
// '': 'Nacoleia',
// '': 'Neoibidionini',
// '': 'Netechma',
Platyrrhini: 'Newworld-monkey',
// '': 'Nitrobacteraceae',
// '': 'Nymphicula',
// '': 'Nyssodrysternum',
// '': 'Obtortionidae',
// '': 'Oddtoedungulate',
// '': 'Oemini',
Cercopithecoidea: 'Oldworld-monkey',
// '': 'Olivellidae',
// '': 'Opisthokont',
Orchidaceae: 'Orchid',
// '': 'Oreodera',
// '': 'Oreohelicidae',
// '': 'Ornithischian',
// '': 'Ornithology',
// '': 'Orthocomotis',
// '': 'Ostracod',
// '': 'Oxidoreductase',
// '': 'Paracles',
// '': 'Parasite-insect',
// '': 'Parasitic-SAR',
// '': 'Parornix',
// '': 'Passerine',
// '': 'Pediculariidae',
// '': 'Permian-reptile',
// '': 'Phyllobacteriaceae',
// '': 'Piezocerini',
// '': 'Piletocera',
// '': 'Placobranchidae',
// '': 'Placoderm',
// '': 'Plesiosaur',
// '': 'Poriferan',
Eumeninae: 'Potter-wasp',
Primates: 'Primate',
// '': 'Prionapterygini',
// '': 'Procerithiidae',
// '': 'Propionibacterineae',
// '': 'Prosoplus',
// '': 'Protostome',
// '': 'Psapharochrus',
// '': 'Psaphidinae',
// '': 'Pseudolividae',
// '': 'Pseudonocardineae',
Pterosauria: 'Pterosaur',
// '': 'Pyramidulidae',
Pyrausta: 'Pyrausta (moth)',
// '': 'Rasboras',
// '': 'Remizidae',
Rodentia: 'Rodent',
Rosids: 'Rosid',
// '': 'Rotifer',
Urodela: 'Salamander',
// '': 'Saurita',
// '': 'Sauropodomorph',
Symphyta: 'Sawfly',
// '': 'Scaliolidae',
// '': 'Scaphopod',
Scorpiones: 'Scorpion',
Selachimorpha: 'Shark',
Soricidae: 'Shrew',
// '': 'Siliquariidae',
// '': 'Siphonariidae',
Serpentes: 'Snake',
// '': 'Solenogaster',
// '': 'Spirochaetae',
// '': 'Springtail',
Sciuridae: 'Squirrel',
// '': 'Stenalia',
// '': 'Stenothyridae',
// '': 'Stictopterinae',
// '': 'Strepsimaninae',
// '': 'Strobilopsidae',
// '': 'Subulinidae',
Cygnus: 'Swan',
// '': 'Synapsid',
// '': 'Tardigrade',
// '': 'Teliomycotina',
// '': 'Therapsid',
// '': 'Theropod',
// '': 'Thysanura',
// '': 'Trifurcula',
// '': 'Trigonochlamydidae',
// '': 'Trilobite',
// '': 'Truncatellidae',
Oomycota: 'Watermould',
// '': 'Zetaproteobacteria', // TODO: create Template:Taxonomy
Acariformes: 'Acari', // mites and ticks part 1
Acoelomorpha: 'Xenacoelomorpha', // redirects to this parent taxon
Actinopterygii: 'Rayfinned-fish',
Amphibia: 'Amphibian',
Amphipoda: 'Amphipod',
Anatidae: 'Duck',
Animalia: 'Animal',
Anisoptera: 'Dragonfly',
Annelida: 'Annelid',
// 'Apocrita': 'Wasp', // polyphyletic
Arachnida: 'Arachnid',
Araneae: 'Spider',
Arecaceae: 'Palm',
'Alsophila (plant)': 'Alsophila-plant',
Astacoidea: 'Crayfish',
Asterids: 'Asterid',
Aves: 'Bird',
Bivalvia: 'Bivalve',
Blattodea: 'Cockroach',
Brachyura: 'Crab',
Bromeliaceae: 'Bromeliad',
Cactaceae: 'Cactus',
Cephalopoda: 'Cephalopod',
Cnidaria: 'Cnidarian',
Crocidurinae: 'Whitetoothed-shrew',
Coffea: 'Coffee',
Coleoptera: 'Beetle',
Ctenophora: 'Ctenophore',
Curculionoidea: 'Weevil',
Decapodiformes: 'Squid',
Demospongiae: 'Demosponge',
Ephemeroptera: 'Mayfly',
Formicidae: 'Ant',
Fungi: 'Fungus',
Gastropoda: 'Gastropod',
Gekkota: 'Gecko',
Heterocera: 'Moth',
Insecta: 'Insect',
Isopoda: 'Isopod',
Isoptera: 'Termite',
'Mimosoid clade': 'Mimosoideae', // this one is weird. intentional though
Monocots: 'Monocot',
Myrmeleontidae: 'Antlion',
Nematoda: 'Nematode',
Nemertea: 'Nemertean',
Octopoda: 'Octopus',
Onychophora: 'Onychophore',
Parasitiformes: 'Acari', // mites and ticks part 2
Parastacoidea: 'Crayfish',
Picidae: 'Woodpecker',
Plantae: 'Plant',
Psittaciformes: 'Parrot',
Reptilia: 'Reptile',
Salticidae: 'Jumping-spider',
Scincidae: 'Skink',
Siluriformes: 'Catfish',
Soricinae: 'Redtoothed-shrew',
// 'Squamata': 'Lizard', // paraphyletic
Testudines: 'Turtle',
Thysanoptera: 'Thrips',
Trochilidae: 'Hummingbird'
};
for ( const key in listOfNonLatinSpeciesStubs ) {
for ( const key2 in pagesToCheck ) {
const stubNameToCheck = 'Template:' + key + '-stub';
const pageName = pagesToCheck[ key2 ];
if ( pageName === stubNameToCheck ) {
pagesToCheck[ key2 ] = 'Template:' + listOfNonLatinSpeciesStubs[ key ] + '-stub';
}
}
}
// Replace any latin categories that have non-latin equivalents, with the non-latin categories
for ( const key in listOfNonLatinSpeciesCategories ) {
for ( const key2 in pagesToCheck ) {
const stubNameToCheck = 'Category:' + key;
const pageName = pagesToCheck[ key2 ];
if ( pageName === stubNameToCheck ) {
pagesToCheck[ key2 ] = 'Category:' + listOfNonLatinSpeciesCategories[ key ];
}
}
}
return pagesToCheck;
}
function parseListOfPages( listOfPages, type ) {
// get rid of entries that aren't of the correct type
switch ( type ) {
case 'category':
listOfPages = listOfPages.filter( ( str ) => str.match( /^Category:.*$/i ) );
break;
case 'navbox':
listOfPages = listOfPages.filter( ( str ) => str.match( /^Template:.*(?<!-stub)$/i ) );
break;
case 'stub':
listOfPages = listOfPages.filter( ( str ) => str.match( /^Template:.*-stub$/i ) );
break;
}
// only return the deepest taxa that was found (the entry closest to the beginning of the list)
listOfPages = listOfPages[ 0 ] || '';
// get rid of Template: and Category:
return listOfPages.replace( /(?:Template:|Category:)/i, '' );
}
function getAllTaxaCategories( listOfPages ) {
listOfPages = listOfPages.filter( ( str ) => str.match( /^Category:.*$/ ) );
return listOfPages;
}
/** Fixes the order of the array, which got scrambled when running the API query. The correctOrder array is bigger and in order, the incorrectOrder array is smaller and scrambled. The result will be smaller and in order. */
function fixArrayOrder( correctOrder, incorrectOrder ) {
return correctOrder.filter( ( str ) => incorrectOrder.indexOf( str ) !== -1 );
}
// TODO: write unit test for this function. maybe move it to a class
function countWords( wikicode ) {
// convert {{Blockquote}} to text
wikicode = wikicode.replace( /\{\{Blockquote\s*\|([^}]*)\}\}/g, '$1' );
// strip templates
// TODO: handle nested templates
wikicode = wikicode.replace( /\{\{.*?\}\}/gsi, '' );
// strip images
wikicode = wikicode.replace( /\[\[File:.*?\]\]/gsi, '' );
// strip HTML comments
wikicode = wikicode.replace( /<!--.*?-->/gsi, '' );
// strip HTML tags and refs
wikicode = wikicode.replace( /<.*?.*?\/.*?>/gsi, '' );
// strip heading formatting
wikicode = wikicode.replace( / {0,}=={1,} {0,}/gsi, '' );
// strip categories
wikicode = wikicode.replace( /\[\[:?Category:.*?\]\]/gsi, '' );
// handle piped wikilinks
// TODO: handle nested brackets (for example, a wikilink as an image caption)
wikicode = wikicode.replace( /\[\[[^\]]+\|([^\]]+)\]\]/gsi, '$1' );
// handle simple wikilinks
wikicode = wikicode.replace( /\[\[/g, '' ).replace( /\]\]/g, '' );
// strip bold and italics
wikicode = wikicode.replace( /'{2,}/g, '' );
// consolidate whitespace
wikicode = wikicode.replace( /\s+/gsi, ' ' );
// to space
wikicode = wikicode.replace( / /gsi, ' ' );
// In one of my test cases, there was a }} that didn't get deleted. But this is not detected by \w+, so no need to worry about it.
wikicode = wikicode.trim();
const wordCount = wikicode.match( /(\w+)/g ).length;
return wordCount;
}
function isMinorChange( wikicode, wikicode2 ) {
const wikicode2LowerCase = wikicode2.replace( /\n/g, '' ).toLowerCase().trim();
const wikicodeLowerCase = wikicode.replace( /\n/g, '' ).toLowerCase().trim();
return wikicode2LowerCase === wikicodeLowerCase;
}
function arraysHaveSameValuesCaseInsensitive( array1, array2 ) {
if ( array1 === null && array2 === null ) {
return true;
}
if ( array1 === null || array2 === null ) {
return false;
}
// https://stackoverflow.com/a/6230314/3480193
if ( array1.sort().join( ',' ).toLowerCase() === array2.sort().join( ',' ).toLowerCase() ) {
return true;
}
return false;
}
function taxaStringToArray( taxa ) {
// get rid of "Life" at the end
taxa = taxa.replace( ', Life-', '' );
// convert to array
taxa = taxa.split( ', ' );
// get rid of both -Genus and /Plantae
taxa = taxa.map( ( str ) => str.replace( /[-/].*?$/, '' ) );
return taxa;
}
function deleteAllStubs( wikicode ) {
return wikicode.replace( /\{\{[^}]*-stub\}\}\n/gi, '' );
}
function isSandbox( titleWithNamespaceAndUnderscores ) {
return !!titleWithNamespaceAndUnderscores.match( /sandbox/i );
}
async function getPageCreationDate( title ) {
const api = new mw.Api();
const response = await api.get( {
action: 'query',
format: 'json',
prop: 'revisions',
titles: title,
rvlimit: '1',
rvdir: 'newer'
} );
const page = getFirstValueInObject( response.query.pages );
let pageCreatedDate = page.revisions[ 0 ].timestamp; // 2015-09-30T17:28:35Z
pageCreatedDate = pageCreatedDate.slice( 0, 10 ); // keep the date, chop off the time
return pageCreatedDate;
}
function getFirstValueInObject( obj ) {
return obj[ Object.keys( obj )[ 0 ] ];
}
// TODO: unit test failing in CI but not locally. this function isn't used anymore though. commenting out unit test.
function getDateOneYearAgo( today ) {
// https://stackoverflow.com/a/33070481/3480193
const year = today.getFullYear();
const month = today.getMonth();
const day = today.getDate();
const lastYear = new Date( year - 1, month, day + 1 );
// https://stackoverflow.com/a/29774197/3480193
return lastYear.toISOString().split( 'T' )[ 0 ];
}
function fixSpeciesParameterThatContainsGenus( wikicode2 ) {
const hasSpeciesBox = getSpeciesBox( wikicode2 );
const hasGenusParameter = wikicode2.match( /\|\s*genus\s*=\s*([A-Za-z ()]+?)\s*[<\n|}]/ );
const hasSpeciesParameter = wikicode2.match( /\|\s*species\s*=\s*([A-Za-z ()]+?)\s*[<\n|}]/ );
if ( hasSpeciesBox && hasGenusParameter && hasSpeciesParameter ) {
const genusParameter = hasGenusParameter[ 1 ];
const speciesParameter = hasSpeciesParameter[ 1 ];
if ( genusParameter === speciesParameter.split( ' ' )[ 0 ] ) {
wikicode2 = wikicode2.replace( /(\|\s*species\s*=\s*)([A-Za-z()]+ )/, '$1' );
}
}
return wikicode2;
}
function getSpeciesBox( wikicode2 ) {
return wikicode2.match( /\{\{(?:Speciesbox|Species[ _]box)/i );
}
function removeItalicTitleIfSpeciesBoxPresent( wikicode2 ) {
const hasSpeciesBox = getSpeciesBox( wikicode2 );
if ( hasSpeciesBox ) {
// remove {{Italic title}}
wikicode2 = wikicode2.replace( /\{\{(?:Italic[ _]?title)[^}]*\}\}\n?/gsi, '' );
}
return wikicode2;
}
function replaceReferencesWithReflist( wikicode2 ) {
const referencesTag = wikicode2.match( /<references ?\/>/i );
if ( referencesTag ) {
wikicode2 = wikicode2.replace( /<references ?\/>/i, '{{Reflist}}' );
}
return wikicode2;
}
function deleteFromArray( needle, haystack ) {
const index = haystack.indexOf( needle );
if ( index > -1 ) {
haystack.splice( index, 1 );
}
return haystack;
}
/** returns null if none, or ['Category:test1', 'Category:test2', etc.] if found */
function getListOfCategoriesFromWikitext( wikicode2 ) {
const allCategoriesRegEx = /(?<=\[\[:?)Category:.+?(?=\]|\|)/gi;
return wikicode2.match( allCategoriesRegEx );
}
function suggestShortDescriptionFromWikicode( wikicode2, disallowedList = [] ) {
// delete quotation marks
wikicode2 = wikicode2.replace( /"/g, '' );
// delete brackets, including the first part of the pipe
// TODO: handle nested brackets (for example, a wikilink as an image caption)
wikicode2 = wikicode2.replace( /\[\[(?:[^|\]]+\|)?([^\]]+)\]\]/g, '$1' );
// delete templates
// TODO: handle nested templates
wikicode2 = wikicode2.replace( /\{\{.*?\}\}/gs, '' );
// delete <ref></ref>
wikicode2 = wikicode2.replace( /<ref[^<]+<\/ref>/gis, '' );
// delete <ref />
wikicode2 = wikicode2.replace( /<ref[^/]+\/>/gi, '' );
// delete <!-- comments -->
wikicode2 = wikicode2.replace( /<!--.*?-->/gs, '' );
// exit if "is a species of" not found
const hasSpeciesPhrase = wikicode2.match( / is a \[?\[?species\]?\]? of /i );
if ( !hasSpeciesPhrase ) {
return '';
}
// chop everything before and including "is a species of "
wikicode2 = wikicode2.replace( /^.*?is a \[?\[?species\]?\]? of /is, '' );
// delete bold and italic formatting, without deleting their encasing word
wikicode2 = wikicode2.replace( /'{2,}/g, '' );
// delete anything after punctuation, including the punctuation. except punctuation that occurs mid-sentence, such as dash
wikicode2 = wikicode2.replace( /[~!@#$%^&*()_+`=\\\][{}|;':",./<>?].*$/s, '' );
// chop certain adjectives that just make the short description longer and aren't that helpful
wikicode2 = wikicode2.replace( /(?:nocturnal|strepsirrhine|marine|small to medium)/gi, '' );
// fix double spacing issues caused by above replace
wikicode2 = wikicode2.replace( / {2,}/gi, ' ' );
// delete anything after certain conjunctions
wikicode2 = wikicode2.replace( / (?:And|Belonging|Commonly|Described|Discovered|Endemic|Found|Known|Native|Observed|Occurring|That|which).*$/is, '' );
// delete anything after the first encountered preposition, including the preposition
wikicode2 = wikicode2.replace( / (?:Aboard|About|Above|According to|Across|After|Against|Ago|Ahead|Along|Along with|Alongside|Amid|Among|Anti|Apart from|Around|As|As for|As per|As to|As well as|Aside from|Astride|At|Atop|Away|Bar|Barring|Because of|Before|Behind|Below|Beneath|Beside|Besides|Between|Beyond|But|By|Circa|Close to|Concerning|Considering|Contrary to|Counting|Depending on|Despite|Down|Due to|During|Except|Except for|Excepting|Excluding|Far|Following|For|Forward of|From|Further to|Given|Gone|In|Including|Inside|Instead of|Into|Less|Like|Minus|Near|Near to|Next to|Notwithstanding|Of|On|Opposite|Other than|Out|Over|Owing to|Past|Pending|Per|Plus|Prior to|Pro|Re|Regarding|Regardless of|Round|Save|Save for|Saving|Since|Than|Thanks to|Through|Throughout|till|To|Together with|Toward|towards|Under|Underneath|Unlike|Until|Up|Versus|Via|With|Within|Worth)[.,!? ].*$/is, '' );
// trim
wikicode2 = wikicode2.trim();
// don't just regurgitate a taxa
for ( const disallowed of disallowedList ) {
const regEx = new RegExp( regExEscape( disallowed ), 'i' );
if ( disallowed && wikicode2.match( regEx ) ) {
return '';
}
}
// Chop all words except the last word, which should be a noun
// Species of western saltwater crocodile -> Species of crocodile
// let firstWords = getFirstWords(wikicode2);
let lastWord = getLastWord( wikicode2 );
lastWord = new Inflect().singularize( lastWord );
wikicode2 = /* firstWords + */ lastWord;
// keep short description 40 characters or less, per WP:SDSHORT
if ( wikicode2.length + 11 > 40 ) {
return '';
}
// return {{Short description|Species of ...}}
if ( wikicode2 ) {
return `{{Short description|Species of ${ wikicode2 }}}`;
}
return '';
}
/** In a string such as "1 2 3 4", return "1 2 3 " */
function getFirstWords( wikicode2 ) {
const matches = wikicode2.match( /^(.*?)([^ ]*)$/m );
return matches[ 1 ];
}
/** In a string such as "1 2 3 4", return "4" */
function getLastWord( wikicode2 ) {
const matches = wikicode2.match( /^(.*?)([^ ]*)$/m );
return matches[ 2 ];
}
/** convert =TitleHeading= to ==H2Heading== */
function convertH1ToH2( wikicode ) {
return wikicode.replace( /^= ?([^=]*?) ?=$/gm, '== $1 ==' );
}
/** no more than 2 newlines (1 blank line) in a row. except stubs, which get 3 newlines (2 blank lines) */
function deleteMoreThanTwoEntersInARow( wikicode ) {
// fix \n[space]\n. If not fixed, this will mess up patterns below.
wikicode = wikicode.replace( /\n +\n/g, '\n\n' );
// delete extra enters
wikicode = wikicode.replace( /\n{3,}/gm, '\n\n' );
wikicode = wikicode.replace( /\n{2}(\{\{[^}]*stub\}\})/gi, '\n\n\n$1' );
return wikicode;
}
function deleteMoreThanTwoEntersInARowBeforeReferences( wikicode, mopf ) {
const referencesSectionPosition = mopf.getSectionPosition( wikicode, 'notesAndReferences' );
let topHalf = wikicode.slice( 0, referencesSectionPosition );
const bottomHalf = wikicode.slice( referencesSectionPosition );
topHalf = topHalf.replace( /\n{3,}/gm, '\n\n' );
wikicode = topHalf + bottomHalf;
return wikicode;
}
function fixWhitespaceInCategories( wikicode ) {
wikicode = wikicode.replace( /(\[\[:?Category:)\s*([^\]|]+?)\s*(\]\])/gi, '$1$2$3' );
wikicode = wikicode.replace( /(\[\[:?Category:)\s*([^\]|]+?)\s*(\|)\s*([^\]|]+?)\s*(\]\])/gi, '$1$2$3$4$5' );
return wikicode;
}
function removeAuthorityControlIfTaxonbarPresent( wikicode2 ) {
const hasTaxonBar = wikicode2.match( /\{\{(?:Taxonbar|Taxon-bar|Taxobar|TaxonIDs|Taxon[ _]identifiers|Taxon[ _]bar)/i );
if ( hasTaxonBar ) {
wikicode2 = wikicode2.replace( /\{\{(?:Authority[ _]control|Normdaten|Authoritycontrol|External[ _]identifiers|Autorité|Control[ _]de[ _]autoridades|전거[ _]통제|Normativna[ _]kontrola)\}\}\n/gi, '' );
}
return wikicode2;
}
function removeEmptyDefaultSort( wikicode2 ) {
wikicode2 = wikicode2.replace( /\{\{(?:DEFAULTSORT|Default[ _]sort|SORTIERUNG)[:|]?\s*\}\}\n?/gi, '' );
return wikicode2;
}
function removeDefaultSortIdenticalToTitle( wikicode2, titleNoNamespaceNoUnderscores ) {
const titleRegExEscaped = regExEscape( titleNoNamespaceNoUnderscores );
const regex = new RegExp( `\\{\\{(?:DEFAULTSORT|Default[ _]sort|SORTIERUNG)[:\\|]${ titleRegExEscaped }\\}\\}\\n?`, 'gi' );
wikicode2 = wikicode2.replace( regex, '' );
return wikicode2;
}
function addSafelistedStubs( newStubs, wikicode2 ) {
const stubSafelist = [
// Stubs that do not 1:1 correspond with taxonomy, but that should not be deleted
// SENTENCE CASE AND DASHES PLEASE, usually singular
'Australia-asterid',
'Australia-eudicot',
'Crab',
'Ediacaran',
'Edicarian',
'Fish',
'Fruit',
'Fruit-tree',
'Green algae', // polyphyletic
'Lichen',
'Lizard', // all lizards are in squamata, but not all of squamata is lizards
'NewZealand-plant',
'Parasite',
'Parasitic animal', // space instead of dash is intentional
'Samoa',
'Solomons',
'Squat-lobster',
'Tree'
];
for ( const stub of stubSafelist ) {
const regexString = regExEscape( stub );
const isPresent = wikicode2.match( new RegExp( '\\{\\{' + regexString + '-stub\\}\\}', 'i' ) );
if ( isPresent && !newStubs.includes( '{{' + stub + '-stub}}' ) ) {
newStubs.push( '{{' + stub + '-stub}}' );
}
}
return newStubs;
}
function getTitleNoNamespace( title ) {
return mw.Title.newFromText( title ).getName(); // TODO: bug when the title contains a period, everything after the period is chopped, e.g.
}
function isSubSpecies( title, wikicode2 ) {
const titleContainsSubSpecies = title.includes( 'subsp.' );
const wikicodeContainsInfraSpeciesBox = wikicode2.match( /\{\{Infraspeciesbox/i ) !== null;
return titleContainsSubSpecies || wikicodeContainsInfraSpeciesBox;
}
function enableCategories( wikicode2, isDraft ) {
if ( !isDraft ) {
wikicode2 = wikicode2.replace( /\[\[:Category:/gi, '[[Category:' );
}
return wikicode2;
}
function disableCategories( wikicode2, isDraft ) {
if ( isDraft ) {
wikicode2 = wikicode2.replace( /\[\[Category:/gi, '[[:Category:' );
}
return wikicode2;
}
function deleteGenusCategoryWithSpaceDisambiguator( wikicode2, genus, draftCategoryColon ) {
const regEx = new RegExp( `\\[\\[${ draftCategoryColon }Category:` + regExEscape( genus ) + '\\| \\]\\]\\n', 'i' );
return wikicode2.replace( regEx, '' );
}
function removeDraftTagIfNotDraftspace( wikicode2, isDraft ) {
if ( isDraft ) {
return wikicode2;
}
return wikicode2.replace( /\{\{Draft[^}]*\}\}/gi, '' );
}
function isDisambiguationPage( wikicode2 ) {
return Boolean( wikicode2.match( /disambiguation(?:\}\}|\|)/i ) );
}
function isRedirectPage( wikicode2 ) {
return Boolean( wikicode2.match( /^[\n ]*#REDIRECT \[\[/is ) );
}
function deleteStubTemplateIfAnyOtherStubsPresent( wikicode2 ) {
const hasNonGenericStub = wikicode2.match( /\{\{.+-stub\}\}/gi );
const hasGenericStub = wikicode2.match( /\{\{stub\}\}/gi );
if ( hasNonGenericStub && hasGenericStub ) {
wikicode2 = wikicode2.replace( /\{\{stub\}\}\n?/gi, '' );
}
return wikicode2;
}
function getSpeciesboxTaxonAndParentParameters( wikicode2 ) {
const hasTaxonParameter = wikicode2.match( /\|\s*taxon\s*=\s*([A-Z][a-z]+) ([a-z]+)\s*[<\n|}]/ );
if ( hasTaxonParameter ) {
const species = hasTaxonParameter[ 2 ];
const hasParentParameter = wikicode2.match( /\|\s*parent\s*=\s*([A-Za-z ()]+?)\s*[<\n|}]/ );
if ( hasParentParameter && species ) {
const taxonomyTemplateGenus = hasParentParameter[ 1 ];
const genusForAlert = taxonomyTemplateGenus;
return {
taxonomyTemplateGenus: taxonomyTemplateGenus,
genusForAlert: genusForAlert,
species: species
};
}
}
return null;
}
});
// </nowiki>