User:Demi/wikicontribs

From Wikipedia, the free encyclopedia

The following script can be used to summarize an editor's contributions to Wikipedia, for example mine as of 2005-10-18 (the small numbers in parentheses are edit summaries):

Namespace Article Talk Total
Main 525 (522) 139 (134) 664 (656)
Wikipedia 281 (281) 80 (80) 361 (361)
User 137 (134) 68 (66) 205 (200)
Image 54 (54) 1 (1) 55 (55)
Template 24 (24) 11 (11) 35 (35)
Category 6 (6) 6 (6)
Total 1027 (1021) 299 (292) 1326 (1313)

Note that the script is slow because of the X(HT)ML parsing going on. Also, as written the script will usually time out when requesting the URL from mediawiki. The fix is to paste that URL into wget or something and then run wikicontribs on the resulting file.

The maximum number of contributions mediawiki will serve up per page is 5000, so you may need to download several contribution pages (adjusting the URL each time) by hand and run them all through this script (e.g. wikicontribs file1 file2 file3...).

 #!/usr/bin/ruby
 
 require 'rexml/document'
 require 'open-uri'
 require 'tempfile'
 require 'getoptlong'
 
 $NAMESPACES = [
    'Media',
    'Special',
    'Talk',
    'User', 'User talk',
    'Wikipedia', 'Wikipedia talk',
    'Image', 'Image talk',
    'MediaWiki', 'MediaWiki talk',
    'Template', 'Template talk',
    'Help', 'Help talk',
    'Category', 'Category talk'
 ]
 
 class Contrib
    attr_reader :namespace, :title, :editsummary
 
    def initialize(li)
       begin
          a, = li.get_elements('a[last()]')
          title = a.attributes['title']
          if m = title.match(/^([\s\w]+)\:(.*)$/)
             ns = m[1]
             if ns == "Talk"
                @namespace = "Main talk"
                @title = m[2]
             elsif $NAMESPACES.include? ns
                @namespace = ns
                @title = m[2]
             else
                @namespace = 'Main'
                @title = title
             end
          else
             @namespace = 'Main'
             @title = title
          end
       rescue NoMethodError
          @namespace = 'Nil'
          @title = 'nil'
       end
 
       # Does it have an edit summary?
       em = li.get_elements("span[@class='comment']")
       if ! em.empty?
          @editsummary = true
       else
          @editsummary = false
       end
 
    end
 
 end
 
 $opt = {
    'limit' => 5000,
    'url' => 'http://en.wikipedia.org/w/index.php',
    'page' => 'Special:Contributions',
    'output' => 'plain'
 }
 
 begin
    GetoptLong::new(
       ['--limit',    GetoptLong::REQUIRED_ARGUMENT],
       ['--url',      GetoptLong::REQUIRED_ARGUMENT],
       ['--page',     GetoptLong::REQUIRED_ARGUMENT],
       ['--output',   GetoptLong::REQUIRED_ARGUMENT],
       ['--user',     GetoptLong::REQUIRED_ARGUMENT]
    ).each { |k, v| $opt[k.sub(/^--/,'')] = v }
 rescue GetoptLong::InvalidOption
    true
 end
 
 contributions = { }
 ct = 0
 summaries = 0
 
 if $opt.key? 'user'
    ARGV.unshift($opt['url'] + '?title=' + $opt['page'] +
       '&target=' + $opt['user'] + '&namespace=' + '&offset=0' +
       '&limit=' + $opt['limit'].to_s)
 end
 
 ARGV.each { |file|
    puts "Scanning %s" % file
    open(file) { |fh|
       fh = fh.open if fh.class == Tempfile
       begin
          doc = REXML::Document::new(fh)
       rescue REXML::ParseException => parserr
          puts parserr
       end
       ul, = doc.get_elements("//div[@id='bodyContent']//ul")
       ul.each_element('li') { |li|
          ct += 1
          contrib = Contrib::new(li)
          unless contributions.key? contrib.namespace
             contributions[contrib.namespace] =
                { 'count' => 0, 'summaries' => 0 }
          end
          contributions[contrib.namespace]['count'] += 1
          if contrib.editsummary
             summaries += 1
             contributions[contrib.namespace]['summaries'] += 1
          end
       }
    }
 }
 
 talkct = 0
 talksummaries = 0
 
 contributions.keys.each { |k|
    talkns = k + ' talk'
    if contributions.key? talkns
       talkct += contributions[k]['talk'] = contributions[talkns]['count']
       talksummaries += contributions[k]['talksummaries'] =
          contributions[talkns]['summaries']
       contributions.delete(talkns)
    end
 }
 
 sortarr = contributions.sort { |a, b| b[1]['count'] <=> a[1]['count'] }
 
 artct = 0
 artsummaries = 0
 
 if $opt['output'] == 'plain'
    sortarr.each { |p|
       k, v = p
       if v.key? 'talk'
          puts "%s: %i (%i) / %i (%i); %i (%i)" % [k, v['count'], v['summaries'], v['talk'], v['talksummaries'], v['count'] + v['talk'], v['summaries'] + v['talksummaries']]
       else
          puts "%s: %i (%i)" % [k, v['count'], v['summaries']]
       end
       artct += v['count']
       artsummaries += v['summaries']
    }
    puts "Total: %i (%i); %i (%i)" % [artct, artsummaries, talkct, talksummaries, ct, summaries]
 elsif $opt['output'] == 'wikitable'
    puts '{| cellpadding="0" style="border-top: 2px solid; border-bottom: 2px solid; font-size: 90%"',
       '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Namespace',
       '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Article',
       '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Talk',
       '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Total'
 
    cell = '| style="text-align: right" | %i <span style="font-size: 70%%">(%i)</span>'
 
    sortarr.each { |p|
       k, v = p
       puts "|-"
       if v.key? 'talk'
          puts "|#{k}",
             cell % [v['count'], v['summaries']],
             cell % [v['talk'], v['talksummaries']],
             cell % [v['count'] + v['talk'], v['summaries'] + v['talksummaries']],
             '|-'
       else
          puts "|#{k}",
             cell % [v['count'], v['summaries']],
             '|',
             cell % [v['count'], v['summaries']]
             '|-'
       end
       artct += v['count']
       artsummaries += v['summaries']
    }
 
    puts "|-",
       "|Total",
       cell % [artct, artsummaries],
       cell % [talkct, talksummaries],
       cell % [ct, summaries],
       "|}"
 
 end