#!/bin/bash# Make-BHGbot7-edit-list.sh## This script creates a list of category-space pages to be created as category redirects for [[WP:BHGbot 7]]# Each entry in the list is the name of a cateory page to be created as a redirect to the same title,# but with the "z" in "organization" replaced with "s", and vice versa.## e.g. the page [[Category:Anti-Foobar organisations]] is to be created with# the content {{Category redirect|Anti-Foobar organizations|bot=BHGbot}}## Three input files are required. Each is the outut of a quarry query, saved in tab-delimited format.# i) "quarry-orgcats": the output of https://quarry.wmflabs.org/query/46899# ii) "quarry-allcats": the output of https://quarry.wmflabs.org/query/46999# iii) "quarry-all-articles": the output of https://quarry.wmflabs.org/query/47001### There are 5 steps:# 1. Convert the list in quarry-orgcats by replacing s with z and vice-versa: output in fixed-orgcats# 2. Safety check: remove any entries in fixed-orgcats which do not contain "organi[sz]ation" # 3. Prepare the data files for comparison# 4. Remove from fixed-orgcats:# a) existing category pages# b) titles which exist in article space# 5. Convert the list into wikilinked format for use by AWB### ==============================# STEP 1:# in quarry-orgcats, replace every instance of "organisation" with "organization", and vice-versa# save the output in fixed-orgcats#
sed-e's/_/ /g'<quarry-orgcats|\sed-E"s/\b([oO]rgani)s(ations?)\b/\1%%%@#!%%%\2/g"|\sed-E"s/\b([oO]rgani)z(ations?)\b/\1s\2/g"|\sed-E"s/\b([oO]rgani)%%%@#!%%%(ations?)\b/\1z\2/g"|\sed-e's/ /_/g'\>fixed-orgcats
echo-n"created fixed-orgcats. #Lines: "
wc-lfixed-orgcats
### ==============================# STEP 2:# Safety check: purge from fixed-orgcats any lines which do not contain "organi[sz]ation"# This should remove only one line: the "page title" header from the quarry output.# Any more removals indicates an error in the input data#echo-n"purging from fixed-orgcats any lines which do not contain organi[sz]ation: "
grep-P'[oO]rgani[sz]ations?'<fixed-orgcats>fixed-orgcats-purged
echo-n"DONE. #Lines: "
wc-lfixed-orgcats-purged
### ==============================# STEP 3:# Prepare each of the data files # Each file needs to be:# a) converted to unix format by stripping out the CR from the CR-LF pairs.# b) sorted alphabetically to allow use of comm to compare files#echo""echo-n"sorting fixed-orgcats-purged: "
sort<fixed-orgcats-purged|tr-d'\015'>fixed-orgcats-sorted
echo-n"DONE. #lines: "
wc-lfixed-orgcats-sorted
echo-n"sorting quarry-allcats: "
sort<quarry-allcats|tr-d'\015'>quarry-allcats-sorted
echo-n"DONE. #lines: "
wc-lquarry-allcats-sorted
echo-n"sorting quarry-all-articles (may be slow): "
sort<quarry-all-articles|tr-d'\015'>quarry-all-articles-sorted
echo-n"DONE. #lines: "
wc-lquarry-all-articles-sorted
### ==============================# STEP 4# Compare the lists to remove entries which should not be created#echo""echo-n"Removing existing category pages from the list of redirects to be created: "
comm-23fixed-orgcats-sortedquarry-allcats-sorted>fixed-orgcats-notexist
echo-n"DONE. #lines: "
wc-lfixed-orgcats-notexist
echo-n"Removing existing aricle titles from the list of categ redirects to be created: "
comm-23fixed-orgcats-notexistquarry-all-articles-sorted>redirect-cats-to-create-bare
echo-n"DONE. #lines: "
wc-lredirect-cats-to-create-bare
### ==============================# STEP 5# Convert the list into wikilinked format for use by AWB#echo""echo-n"Wikilink the list of redirects to be created: "
sed-E's/^/# [[:Category:/g'<redirect-cats-to-create-bare|sed-e's/$/]]/g'>redirect-cats-to-create.txt
echo"DONE"echo-e"\n\n\n===== FINISHED ====="echo"Stats:"echo-n"Existing non-redirected, non-dab cats with organi[sz]ation in title: "
wc-lfixed-orgcats-purged
echo-n"Proposed redirects which don't already exist as cats: "
wc-lfixed-orgcats-notexist
echo-n"Proposed redirects which don't already exist as cats or as article titles: "
wc-lredirect-cats-to-create-bare
echo-e"\nList of redirects to created is at redirect-cats-to-create.txt"echo-n"Number of redirects to create: "
wc-lredirect-cats-to-create.txt