Wikipedia:List of Wikipedians by number of edits/How to generate the lists

From Wikipedia, the free encyclopedia
Jump to: navigation, search

This page explains how to generate the following lists.

Preconditions[edit]

AnonymousUsers.java

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class AnonymousUsers {
	
	private Map<String, String> users = new HashMap<String, String>();
	
	private static final String INPUT_FILE_NAME = "anonymous.txt";
	
	public void initialize() throws FileNotFoundException, IOException {
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new FileReader(INPUT_FILE_NAME));
			final Pattern pattern = Pattern.compile("^\\s*\\d*\\.?\\s*User:");
			while (true) {
				final String line = reader.readLine();
				if (line == null || line.length() == 0) {
					break;
				}
				final Matcher matcher = pattern.matcher(line);
				final String user = matcher.replaceFirst("");
				users.put(user, user);
			}
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	public boolean contains(String user) {
		return users.containsKey(user);
	}

	public String toString() {
		return users.toString();
	}

}

Namespaces.java

import java.util.HashMap;
import java.util.Map;

class Namespaces {
	
	public static final int MAIN_NAMESPACE = 0;
	
	private final Map<String, Integer> map = new HashMap<String, Integer>();
	
	public void add(String key, int ns) {
		map.put(key, ns);
	}
	
	public int ns(String text) {
		final String NAMESPACE_SEPARATOR = ":";
		if (!text.contains(NAMESPACE_SEPARATOR)) {
			return MAIN_NAMESPACE;
		}
		Integer ns = map.get(text.split(NAMESPACE_SEPARATOR)[0]);
		if (ns == null) {
			return MAIN_NAMESPACE;
		}
		return ns;
	}

}

UnflaggedBots.java

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UnflaggedBots {

	private Map<String, String> users = new HashMap<String, String>();
	
	private static final String INPUT_FILE_NAME = "unflagged-bots.txt";
	
	public void initialize() throws FileNotFoundException, IOException {
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new FileReader(INPUT_FILE_NAME));
			final Pattern pattern = Pattern.compile("^\\s*\\d*\\.?\\s*");
			while (true) {
				final String line = reader.readLine();
				if (line == null || line.length() == 0) {
					break;
				}
				final Matcher matcher = pattern.matcher(line);
				final String user = matcher.replaceFirst("");
				users.put(user, user);
			}
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	public boolean contains(String user) {
		return users.containsKey(user);
	}

	public String toString() {
		return users.toString();
	}

}

User.java

class User {
	
	private int id = 0;
	
	private String text = null;
	
	private int edits = 0;

	private int editsInRecentDays = 0;

	private int editsMain = 0;

	private int editsMainInRecentDays = 0;

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	public String getText() {
		return text;
	}
	
	public void setText(String text) {
		this.text = text;
	}

	public int getEdits() {
		return edits;
	}
	
	public int getEditsInRecentDays() {
		return editsInRecentDays;
	}
	
	public int getEditsMain() {
		return editsMain;
	}

	public int getEditsMainInRecentDays() {
		return editsMainInRecentDays;
	}

	public void incrementEdits(){
		edits++;
	}

	public void incrementEditsInRecentDays(){
		editsInRecentDays++;
	}

	public void incrementEditsMain(){
		editsMain++;
	}

	public void incrementEditsMainInRecentDays(){
		editsMainInRecentDays++;
	}

	public User(){
	}
	
	public User(int id, String text){
		this.id = id;
		this.text = text;
	}
	
	public boolean isIpAddress(){
		return id == 0;
	}
	
	public String toString() {
		return "id: " + id
			+ ", text: " + text
			+ ", edits: " + edits
			+ ", editsRecentDays: " + editsInRecentDays;
	}

}

UserGroups.java

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

class UserGroups {
	
	public static final String SYSOP = "sysop";
	public static final String BOT = "bot";
	public static final String FILE_NAME_SUFFIX = "user_groups.sql.gz";
	
	private final Map<Integer, Integer> sysops = new HashMap<Integer, Integer>();
	private final Map<Integer, Integer> bots = new HashMap<Integer, Integer>();
	
	public void initialize(InputStream inputStream) throws IOException {
		BufferedReader reader = null;
		reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(inputStream)));
		final Pattern lineStartPattern = Pattern.compile("^INSERT INTO `user_groups` VALUES \\(");
		while(true){
			String line = reader.readLine();
			if (line == null) {
				break;
			}
			if (!lineStartPattern.matcher(line).find()){
				continue;
			}
			line = lineStartPattern.matcher(line).replaceFirst("");
			line = Pattern.compile("\\);$").matcher(line).replaceFirst("");
			final String[] userGroupStrings = line.split("\\),\\(");
			for (String userGroupString : userGroupStrings) {
				final StringTokenizer userGroupTokenizer = new StringTokenizer(userGroupString, ",");
				final int user = Integer.parseInt(userGroupTokenizer.nextToken());
				final String group = userGroupTokenizer.nextToken();
				if (group.equals("'" + SYSOP + "'")) {
					sysops.put(user, user);
				} else if (group.equals("'" + BOT + "'")) {
					bots.put(user, user);
				}
			}
		}
	}
		
	public String group(int user) {
		if (sysops.containsKey(user)) {
			return SYSOP;
		} else if (bots.containsKey(user)) {
			return BOT;
		} else {
			return "";
		}
	}

}

WikipediansByNumberOfEdits.java

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.EmptyStackException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.TimeZone;
import java.util.zip.GZIPInputStream;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public abstract class WikipediansByNumberOfEdits {
	
	private static final String YEARMONTH_FORMAT_STRING = "yyyy-MM";
	private static final String DATE_FORMAT_STRING = YEARMONTH_FORMAT_STRING + "-dd";
	public static final DateFormat DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT_STRING);
	private static final String TIME_FORMAT_STRING = "HH:mm:ss";
	
	private final Date dateStarted =  new Date();
	
	private static final String LIMIT_PROPERTY_KEY = "limit";
	private int limit = 0;
	
	protected void execute(String[] args) {
		
		try {
			final int VALID_ARGUMENT_LENGTH = 2;
			if (args.length < VALID_ARGUMENT_LENGTH) {
				printUsage();
				System.exit(1);
			}
			System.err.println("Started. " + dateStarted);
			String limitText = System.getProperty(LIMIT_PROPERTY_KEY, "5000");
			limit = Integer.parseInt(limitText);
			final File dumpFile = new File(args[0]);
			fileNameCheck(dumpFile);
			final File userGroupsFile = new File(args[1]); 
			fileNameCheck(userGroupsFile);
			final PrintWriter writer = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
			final UserGroups userGroups = new UserGroups();
			InputStream userGroupsInputStream = null;
			try {
				userGroups.initialize(new FileInputStream(userGroupsFile));
			} finally {
				if (userGroupsInputStream != null) {
					try {
						userGroupsInputStream.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
			final DumpHandler dumpHandler = new DumpHandler();
			dumpHandler.setIpAddressesAreToBeCounted(getIpAddressesAreToBeCounted());
			InputStream dumpInputStream = null;
			try {
				dumpInputStream = new GZIPInputStream(new FileInputStream(dumpFile));
				SAXParserFactory.newInstance().newSAXParser().parse(dumpInputStream, dumpHandler);
			} finally {
				if (dumpInputStream != null) {
					try {
						dumpInputStream.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
			final WikipediansPrinter[] printers = createPrinters();
			for (WikipediansPrinter printer : printers) {
				printer.setWriter(writer);
				printer.setBeginTimestamp(dumpHandler.getBeginTimestamp());
				printer.setEndTimestamp(dumpHandler.getEndTimestamp());
				printer.setTotalEdits(dumpHandler.getRevisionCounter());
				printer.setTotalEditsInPeriod(dumpHandler.getRevisionInPeriodCounter());
				printer.print(dumpHandler.getUsers(), userGroups, limit);
				if (!printer.equals(printers[printers.length - 1])) {
					writer.println();
				}
			}
		} catch (NumberFormatException e) {
			System.err.println("The specified system property \"" + LIMIT_PROPERTY_KEY + "\" is not a valid integer.");
			System.err.println(e);
			System.exit(1);
		} catch (FileNotFoundException e) {
			System.err.println(e);
			System.exit(1);
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (SAXException e) {
			if (e.getCause() instanceof ParseException) {
				System.err.println(e);
			} else {
				e.printStackTrace();
			}
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		} finally {
			final Date dateEnded = new Date();
			System.err.println("Ended. " + dateEnded);
			final SimpleDateFormat dateFormat = new SimpleDateFormat(TIME_FORMAT_STRING);
			dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
			System.err.println("Elapsed: " + dateFormat.format(new Date(dateEnded.getTime() - dateStarted.getTime())));
		}

	}
	
	private void printUsage() {
		System.err.print("Usage (example): java -Xmx500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=5000");
		System.err.print(" " + getClass().getName());
		System.err.print(" " + getWikiName() + "-20080501-stub-meta-history.xml.gz");
		System.err.print(" " + getWikiName() + "-20080501-" + UserGroups.FILE_NAME_SUFFIX);
		System.err.print(" > result.txt");
		System.err.println();
	}
	
	private void fileNameCheck(File file) {
		if (!file.getName().startsWith(getWikiName())) {
			System.err.println("WARNING: The specified file name '" + file.getName() + "' does not start with '" + getWikiName() + "'.");
			try {
				Thread.sleep(5000);
			} catch(InterruptedException e) {
			}
		}
	}
	
	protected abstract String getWikiName();
	
	protected abstract WikipediansPrinter[] createPrinters();
	
	protected boolean getIpAddressesAreToBeCounted() {
		return true;
	}
	
	private static class DumpHandler extends DefaultHandler {
		
		private final Namespaces namespaces = new Namespaces(); 
		
		private final Stack<String> elementStack = new Stack<String>();
		
		private Date beginTimestamp = null;
		private Date endTimestamp = null;
		
		public Date getBeginTimestamp() {
			return beginTimestamp;
		}

		public Date getEndTimestamp() {
			return endTimestamp;
		}

		private static final DateFormat TIMESTAMP_DUMP_FORMAT
								= new SimpleDateFormat(DATE_FORMAT_STRING + "'T'" + TIME_FORMAT_STRING + "'Z'z");
		
		private static final String BEGIN_DATE_PROPERTY_KEY = "begin.date";
		private static final String END_DATE_PROPERTY_KEY = "end.date";
		
		private boolean ipAddressesAreToBeCounted = true;
		
		public void setIpAddressesAreToBeCounted(boolean ipAddressesAreToBeCounted) {
			this.ipAddressesAreToBeCounted = ipAddressesAreToBeCounted;
		}
		
		private int editsInLastMonth = 0;
		private Calendar beginCalendar = Calendar.getInstance();
		private Set<String> usersEditedInLastMonth = new HashSet<String>();

		public void startDocument() throws SAXException {
			beginTimestamp = getDateProperty(BEGIN_DATE_PROPERTY_KEY);
			final Calendar endTimestampCalendar = Calendar.getInstance();
			endTimestampCalendar.setTime(getDateProperty(END_DATE_PROPERTY_KEY));
			endTimestampCalendar.add(Calendar.HOUR, 23);
			endTimestampCalendar.add(Calendar.MINUTE, 59);
			endTimestampCalendar.add(Calendar.SECOND, 59);
			endTimestamp = endTimestampCalendar.getTime();
			beginCalendar.setTime(beginTimestamp);
		}
		
		public void endDocument() throws SAXException {
			System.err.println("Processed: " + revisionCounter);
			System.err.println("As of the last month"
								+ " (" + new SimpleDateFormat(YEARMONTH_FORMAT_STRING).format(beginTimestamp) + "),"
								+ " the Wikipedia received "
								+ (int)(editsInLastMonth / beginCalendar.getActualMaximum(Calendar.DATE))
								+ " edits a day.");
			System.err.println(usersEditedInLastMonth.size()
								+ " registered people (including bots) edited the Wikipedia in that month.");
//			System.err.println("Timestamp ParseException: " + timestampParseExceptionCount + " occured.");
//			System.err.println("User ID error: " + userIdErrorCount + " occured.");
			System.err.flush();
		}
		
		private static Date getDateProperty(String key) throws SAXException {
			String property = System.getProperty(key);
			try {
				return DATE_FORMAT.parse(property);
			} catch (ParseException e) {
				throw new SAXException(e);
			}
		}
		
		public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
			String name = localName.equals("") ? qName : localName;
			elementStack.push(name);
			if (name.equals("namespace")) {
				String key = "";
				try {
					key = atts.getValue("key");
					ns = Integer.parseInt(key);
				} catch (NumberFormatException e) {
					throw new SAXException("ns: " + key, e);
				}
			}
		}
		
		private int revisionCounter = 0;
		
		int getRevisionCounter() {
			return revisionCounter;
		}

		private int revisionInPeriodCounter = 0;
		
		int getRevisionInPeriodCounter() {
			return revisionInPeriodCounter;
		}
		
		private int ns = 0;
		private String namespace = "";
		
		private String pageTitle = "";
		
		private int userId = 0;
		private String userIdString = "";
		private String userText = "";
		private Date timestamp = null;
		private String timestampString = "";
		
		private boolean ignoreRevision = false;
		
		private Map<String, User> map = new HashMap<String, User>();
		
		public User[] getUsers() {
			return map.values().toArray(new User[map.size()]);
		}
		
		private int timestampParseExceptionCount = 0;
		private int userIdErrorCount = 0;
		
		public void endElement(String uri, String localName, String qName) throws SAXException {
			final String name = elementStack.pop();
			if (name.equals("namespace")) {
				namespaces.add(namespace, ns);
				ns = 0;
				namespace = "";
			} else if (name.equals("page")) {
				pageTitle = "";
			} else if (name.equals("timestamp")) {
				ignoreRevision = false;
				try {
					timestamp = TIMESTAMP_DUMP_FORMAT.parse(timestampString + "UTC");
					timestampString = "";
				} catch (ParseException e) {
					timestampParseExceptionCount++;
					ignoreRevision = true;
				}
			} else if (name.equals("revision")) {
				if (!userIdString.equals("")) {
					try {
						userId = Integer.parseInt(userIdString);
					} catch (NumberFormatException e) {
						ignoreRevision = true;
					}
				}
				if (ignoreRevision) {
					return;
				}
				User user = null;
				if (ipAddressesAreToBeCounted || userId != 0) {
					user = map.get(userText);
					if (user == null) {
						user = new User(userId, userText);
						map.put(userText, user);
					}
					if (user.getId() < userId) {
						user.setId(userId);
					}
					if (user.getId() != userId) {
						userIdErrorCount++;
					}
					if (timestampBeroreOrEquals(timestamp)) {
						user.incrementEdits();
						if (timestampIsInPeriod(timestamp)) {
							user.incrementEditsInRecentDays();
						}
						if (namespaces.ns(pageTitle) == Namespaces.MAIN_NAMESPACE) {
							user.incrementEditsMain();
							if (timestampIsInPeriod(timestamp)) {
								user.incrementEditsMainInRecentDays();
							}
						}
					}
				}
				final Calendar calendar = Calendar.getInstance();
				calendar.setTime(timestamp);
				if (calendar.get(Calendar.YEAR) == beginCalendar.get(Calendar.YEAR)
						&& calendar.get(Calendar.MONTH) == beginCalendar.get(Calendar.MONTH)) {
					editsInLastMonth ++;
					if (user != null) {
						usersEditedInLastMonth.add(user.getText());
					}
				}
				if (timestampIsInPeriod(timestamp)) {
					revisionInPeriodCounter ++;
				}
				userId = 0;
				userIdString = "";
				userText = "";
				timestamp = null;
				revisionCounter++;
				final int LOG_INTERVAL = 10000;
				if (revisionCounter % LOG_INTERVAL == 0) {
					System.err.println("Processed: " + revisionCounter);
				}
			}
		}
		
		private boolean timestampIsInPeriod(Date timestamp) {
			return ( timestamp.equals(beginTimestamp) || timestamp.after(beginTimestamp) )
					&& timestampBeroreOrEquals(timestamp);
		}
		
		private boolean timestampBeroreOrEquals(Date timestamp) {
			return ( timestamp.before(endTimestamp) || timestamp.equals(endTimestamp) );
		}
		
		public void characters (char[] ch, int start, int length) {
			try {
				final String elementName = elementStack.peek();
				final String parentElementName = elementStack.elementAt(elementStack.size() - 2);
				final String string = new String(ch, start, length);
				if (elementName.equals("namespace")) {
					namespace += string;
				}
				if (elementName.equals("title")) {
					pageTitle += string;
				}
				if (elementName.equals("timestamp")) {
					timestampString += string;
//					if (revisionCounter % 10000 == 0) {
//						System.err.println(ch.length);
//					}
				} else if (parentElementName.equals("contributor")) {
					if (elementName.equals("id")) {
						userIdString += string;
					} else if (elementName.equals("username")) {
						userText += string;
					} else if (userText.equals("") && elementName.equals("ip")) {
						userId = 0;
						userText += string;
					}
				}
			} catch (EmptyStackException e) {
				// NOP
			} catch (IndexOutOfBoundsException e) {
				// NOP
			}
		}

	}

}

WikipediansByNumberOfEdits_en.java

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Comparator;

public class WikipediansByNumberOfEdits_en extends WikipediansByNumberOfEdits {

	private static AnonymousUsers ANONYMOUS_USERS = null;
	
	private static UnflaggedBots UNFLAGGED_BOTS = null;
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		
		ANONYMOUS_USERS = new AnonymousUsers();
		UNFLAGGED_BOTS = new UnflaggedBots();
		try {
			ANONYMOUS_USERS.initialize();
			UNFLAGGED_BOTS.initialize();
			new WikipediansByNumberOfEdits_en().execute(args);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
	}
	
	protected String getWikiName() {
		return "enwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return false;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		printer.setAnonymousUsers(ANONYMOUS_USERS);
		printer.setUnflaggedBots(UNFLAGGED_BOTS);
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		private AnonymousUsers anonymousUsers = null;
		
		public void setAnonymousUsers(AnonymousUsers anonymousUsers) {
			this.anonymousUsers = anonymousUsers;
		}
		
		private UnflaggedBots unflaggedBots = null;
		
		public void setUnflaggedBots(UnflaggedBots unflaggedBots) {
			this.unflaggedBots = unflaggedBots;
		}
		
		protected int getTargetEdits(User user) {
			return user.getEdits();
		}
		
		protected int getTargetTotalEdits() {
			return getTotalEdits();
		}

		protected String getTableHeader() {
			return "Rank !! User !! Edits !! Edits in the past 30 days";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
		
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void processAnonymous(User user) {
			if (anonymousUsers.contains(user.getText())) {
				user.setText("Place holder");
			}
		}
		
		protected String getGroup(User user, String group) {
			if (group.equals("") && unflaggedBots.contains(user.getText())) {
				return UserGroups.BOT;
			} else {
				return group;
			}
		}
		
		protected Comparator<User> createComparator() {
			return new Comparator<User>() {
				public int compare(User user1, User user2) {
					if (user1.getEdits() != user2.getEdits()) {
						return user2.getEdits() - user1.getEdits(); 
					} else {
						return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
					}
				}
			};
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEdits());
			getWriter().print(" || " + user.getEditsInRecentDays());
		}

	}
	
}

WikipediansByNumberOfRecentEdits_en.java

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_en extends WikipediansByNumberOfEdits {

	private static AnonymousUsers ANONYMOUS_USERS = null;
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		
		ANONYMOUS_USERS = new AnonymousUsers();
		try {
			ANONYMOUS_USERS.initialize();
			new WikipediansByNumberOfRecentEdits_en().execute(args);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
	}
	
	protected String getWikiName() {
		return "enwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return false;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		printer.setAnonymousUsers(ANONYMOUS_USERS);
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		private AnonymousUsers anonymousUsers = null;
		
		public void setAnonymousUsers(AnonymousUsers anonymousUsers) {
			this.anonymousUsers = anonymousUsers;
		}
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		protected String getTableHeader() {
			return "Rank !! User !! Total Edits !! Recent Edits";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
		
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("Period: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected void processAnonymous(User user) {
			if (anonymousUsers.contains(user.getText())) {
				user.setText("Place holder");
			}
		}
		
		protected Comparator<User> createComparator() {
			return new Comparator<User>() {
				public int compare(User user1, User user2) {
					if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
						return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
					} else {
						return user2.getEdits() - user1.getEdits(); 
					}
				}
			};
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEdits());
			getWriter().print(" || " + user.getEditsInRecentDays());
		}
	}
	
}

WikipediansByNumberOfRecentEdits_ja.java

import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_ja extends WikipediansByNumberOfEdits {
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		new WikipediansByNumberOfRecentEdits_ja().execute(args);
	}
	
	protected String getWikiName() {
		return "jawiki";
	}
	
	protected WikipediansPrinter[] createPrinters() {
		return new WikipediansPrinter[]{new MainNamespacePrinter(), new AllNamespacePrinter()};
	}
	
	private static abstract class Printer extends WikipediansPrinter {
		
		protected String getTableHeader() {
			return "順位 !! 利用者 !! 編集回数 !! 総編集回数";
		}
		
		protected String getSpecialText() {
			return "特別";
		}
		
		protected String getUserText() {
			return "利用者";
		}
				
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("== " + getSectionTitle() + " ==\n");
			getWriter().print("期間: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected abstract String getSectionTitle();
		
	}
	
	private static class MainNamespacePrinter extends Printer {
		
		protected int getTargetEdits(User user) {
			return user.getEditsMainInRecentDays();
		}
		
		public String getSectionTitle() {
			return "記事名前空間";
		}
		
		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsMainInRecentDays());
			getWriter().print(" || " + user.getEditsMain());
		}
		
		protected Comparator<User> createComparator() {
			return new Comparator<User>() {
				public int compare(User user1, User user2) {
					if (user1.getEditsMainInRecentDays() != user2.getEditsMainInRecentDays()) {
						return user2.getEditsMainInRecentDays() - user1.getEditsMainInRecentDays();
					} else {
						return user2.getEditsMain() - user1.getEditsMain(); 
					}
				}
			};
		}

	}
	
	private static class AllNamespacePrinter extends Printer {
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		public String getSectionTitle() {
			return "全名前空間";
		}
		
		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsInRecentDays());
			getWriter().print(" || " + user.getEdits());
		}
		
		protected Comparator<User> createComparator() {
			return new Comparator<User>() {
				public int compare(User user1, User user2) {
					if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
						return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
					} else {
						return user2.getEdits() - user1.getEdits(); 
					}
				}
			};
		}

	}
	
}

WikipediansByNumberOfRecentEdits_zh.java

import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_zh extends WikipediansByNumberOfEdits {
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		new WikipediansByNumberOfRecentEdits_zh().execute(args);
	}
	
	protected String getWikiName() {
		return "zhwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return true;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		protected String getTableHeader() {
			return "名次 !! 用户 !! 最近编辑次数 !! 累积编辑次数";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
				
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("期间: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected Comparator<User> createComparator() {
			return new Comparator<User> {
				public int compare(User user1, User user2) {
					if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
						return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
					} else {
						return user2.getEdits() - user1.getEdits(); 
					}
				}
			};
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsInRecentDays());
			getWriter().print(" || " + user.getEdits());
		}
		
	}
	
}

WikipediansPrinter.java

import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;

abstract class WikipediansPrinter {
	
	private PrintWriter writer = null;
	
	public PrintWriter getWriter() {
		return writer;
	}

	public void setWriter(PrintWriter writer) {
		this.writer = writer;
	}
	
	private Date beginTimestamp = null;
	private Date endTimestamp = null;
	
	public Date getBeginTimestamp() {
		return beginTimestamp;
	}

	public void setBeginTimestamp(Date beginTimestamp) {
		this.beginTimestamp = beginTimestamp;
	}

	public Date getEndTimestamp() {
		return endTimestamp;
	}

	public void setEndTimestamp(Date endTimestamp) {
		this.endTimestamp = endTimestamp;
	}
	
	private int totalEdits = 0;

	public void setTotalEdits(int totalEdits) {
		this.totalEdits = totalEdits;
	}

	public int getTotalEdits() {
		return totalEdits;
	}

	private int totalEditsInPeriod = 0;

	public void setTotalEditsInPeriod(int totalEditsInPeriod) {
		this.totalEditsInPeriod = totalEditsInPeriod;
	}
	
	protected int getTargetTotalEdits() {
		return totalEditsInPeriod;
	}

	public void print(User[] users, UserGroups userGroups, int limit) {
		try {
			printHeader();
			Arrays.sort(users, createComparator());
			writer.print("{| class=\"wikitable" + getSortable() + "\"");
			writer.println();
			writer.print("! " + getTableHeader());
			writer.println();
			int rank = 0;
			int prevCount = 0;
			int sameRank = 0;
			int totalEditsByListedUsers = 0;
			int numberOfListedEditors = 0;
			for (User user : users) {
				final String group = getGroup(user, userGroups.group(user.getId()));
				final String groupText = (group.equals("") ? "" : " (" + group + ")");
				final String rankText;
				if (!group.equals(UserGroups.BOT)) {
					if (rank == 0) {
						rank++;
						sameRank = 1;
					} else if (getTargetEdits(user) < prevCount) {
						rank += sameRank;
						sameRank = 1;
					} else {
						sameRank++;
					}
					rankText = Integer.toString(rank);
					numberOfListedEditors++;
					totalEditsByListedUsers += getTargetEdits(user);
					prevCount = getTargetEdits(user);
				} else {
					rankText = "";
				}
				if (rank > limit) {
					break;
				}
				writer.print("|-");
				writer.println();
				writer.print("| " + rankText);
				writer.print(" || ");
				processAnonymous(user);
				if (user.getId() == 0) {
					writer.print("[[" + getSpecialText() + ":Contributions/" + user.getText() + "|" + user.getText() + "]]");
				} else {
					writer.print("[[" + getUserText() + ":" + user.getText() + "|" + user.getText() + "]]");
				}
				writer.print(groupText);
				printEdits(user);
				writer.println();

			}
			writer.print("|}");
			writer.println();
			System.err.println("This list of " + limit + " editors represents " + totalEditsByListedUsers + " total edits,"
					+ " with an average of " + (int)(totalEditsByListedUsers / numberOfListedEditors) + " per editor.");
			System.err.println("This accounts for "
					+ new DecimalFormat("#0.0").format(((float)totalEditsByListedUsers / (float)getTargetTotalEdits()) * 100) + "%"
					+ " of the " + getTargetTotalEdits() + " total edits made to the Wikipedia.");
		} finally {
			writer.flush();
			System.err.flush();
		}
	}
	
	protected abstract int getTargetEdits(User user);
	
	protected abstract String getTableHeader();
	
	protected abstract String getSpecialText();
	
	protected abstract String getUserText();
	
	protected abstract Comparator<User> createComparator();
	
	protected void printHeader() {
		return;
	}
	
	protected abstract void printEdits(User user);
	
	protected void processAnonymous(User user) {
		return;
	}
	
	protected String getGroup(User user, String group) {
		return group;
	}
	
	protected final String SORTABLE = " sortable";
	
	protected String getSortable() {
		return "";
	}

}

Instructions[edit]

The case of the lists of the English Wikipedia[edit]

   1. User:Mikkalai
   2. User:Haemo
   3. User:Jeffrey O. Gustafson
   .
   .
   .

or

User:Mikkalai 
User:Haemo 
User:Jeffrey O. Gustafson 
   .
   .
   .
   1. Bluebot
   2. AntiVandalBot
   3. MartinBot
   .
   .
   .

or

Bluebot 
AntiVandalBot 
MartinBot 
.
.
.

The case of en:Wikipedia:List of Wikipedians by number of edits[edit]

  • Run the Java program as following.
java -Xmx1500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=4000 WikipediansByNumberOfEdits_en enwiki-20080501-stub-meta-history.xml.gz enwiki-20080501-user_groups.sql.gz > result.txt

The case of en:Wikipedia:List of Wikipedians by number of recent edits[edit]

  • Run the Java program as following.
java -Xmx1500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=5000 WikipediansByNumberOfRecentEdits_en enwiki-20080501-stub-meta-history.xml.gz enwiki-20080501-user_groups.sql.gz > result.txt

Using awk[edit]

perform (you don't need java)

mawk -v startdate=2005-01-01 -v enddate=2011-01-31 '{sub(/^[[:blank:]]+/,"")}/<timestamp>/{gsub(/<[^>]*>/,""); date=substr($0,1,10);next} /<username>/{gsub(/<[^>]*>/,""); totcount[$0]++; if ((date >= startdate) && (date <= enddate))periodcount[$0]++} END{for(u in periodcount)print "| | [[User:" u "]] || " periodcount[u]+0 " || " totcount[u] "\n|-"}' input

The case of ja:Wikipedia:編集回数の多いウィキペディアンの一覧[edit]

java -Xmx500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=200 WikipediansByNumberOfRecentEdits_ja jawiki-20080501-stub-meta-history.xml.gz jawiki-20080501-user_groups.sql.gz > result.txt

The case of zh:Wikipedia:最多贡献的用户[edit]

java -Xmx500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=500 WikipediansByNumberOfRecentEdits_zh zhwiki-20080501-stub-meta-history.xml.gz zhwiki-20080501-user_groups.sql.gz > result.txt