User talk:Wfaxon/padfixer
Appearance
--[[
padfixer.lua -- read "Poker After Dark" wikipedia source, produce summary tables
2009-06-22 WF Note no '#!' line since not on usual box
This program allows a quick update of the Wikipedia "Poker After Dark" (PAD) "Stats"
section after additional information about a tournament has been added to the page.
To update the "Stats" section:
1) Have Lua available; see lua.org If you have a C or C++ compiler you can download
and install Lua on your computer in less than five minutes.
2) Copy/paste this page (or the source for it) into a text file named "padfixer.lua"
on your computer.
3) Edit the full page for "Poker After Dark" and copy/paste its full text into a text
file named "padinput" on your computer. Be sure to use the "Back" button to avoid
changing the PAD page.
4) Note the number of "seasons" of PAD that have been added to the page. At this
writing the number is 5. This number will be designated "nSeasons", below.
Ditto the last week of play "nWeek" (this/these should be fixed).
5) From a command line, run:
lua padfixer.lua nSeasons nWeek <padinput >padoutput
This reads the source of the PAD page from "padinput" and produces a new "Stats"
section in "padoutput".
6) It is probable that one or more errors/inconsistencies in player names will be
discovered by the program. The program requires that all occurrences of a player's
name in the tables be identical. Fix the "padinput" file accordingly. You might
also want to fix the tables on the PAD page itself to limit this problem in the
future.
7) When the program has run correctly, edit the PAD "Stats" section, delete all its
content, then cut/paste the "padoutput" file into it, completely replacing it.
8) "Show preview" the changes and check them before saving them.
Potential problems:
- The PAD source has some unicode characters so you have to select UTF-8 encoding
in order to save it. This does not affect the "Stats" tables now but may if we
get, for example, a player with an umlaut in his/her name. Plain Lua is rather
dumb about this. Maybe someone will be moved to translate this program into,
say, Python?
- The complete "Stats" section is reproduced. When accepted edits are made to it,
even if the table contents remain the same, the "forms" in the below source code
will have to be changed.
I plan to additionally sort on the other finishes columns, sometime or other.
--]]
-------------------------------------------------------------------------------------
-- "Stats" forms sans table contents ([[ and ]] for multi-line strings)
local form1 = [[
==Stats==
These statistics are meant to help readers judge the players' relative performances in the main type of event featured on "Poker After Dark": The six-player winner-take-all tournament. So, with the exception of the "Total appearances" column, the statistics omit all "Cash Games" (which do not even have declared winners) and the one "Heads Up" event played so far (Season 4, Week 3, won by Phil Hellmuth). The notations (x2) and (x3) mean the player achieved that placement two or three times, respectively.
:''Stats updated to reflect results through Season %d, Week %d.''
]]
-----
local form2 = [[
{{col-begin}}
{{col-2}}
===Most wins===
The following players have won more than once. (Aired episodes of regular-format tournaments only.)
{| class="wikitable"
|-
! Player !! Wins !! Appearances !! % Won !! Other finishes
]]
-----
local form3 = [[
|-
|}
===Never won===
The following players have appeared more than twice, but have yet to win. (Aired episodes of regular-format tournaments only.)
{| class="wikitable"
|-
! Player !! Appearances !! Highest finish
]]
-----
local form4 = [[
|-
|}
{{col-2}}
===Most appearances===
The following players have been invited to play on ''Poker After Dark'' more than twice. ("Total appearances" column includes all filmed events; "Aired" omits non-regular format tournaments as well as yet-to-air episodes.)
{| class="wikitable"
|-
! Player !! Total<br />appearances !! Aired !! Highest<br />aired finish
]]
-----
local form5 = [[
|-
|}
{{col-end}}
]]
-------------------------------------------------------------------------------------
-- input parsing
-- normal quoting only
function removeQuotes(x)
local y = string.gsub(x,'".-"',' ') -- replace quoted subsstrings with one space
if string.find(y,'"') then
error('unmatched quote in string |'..x..'|')
end
return y
end
-- remove quoted nicknames, link '[]'s, parenthetical disambigs, excess spacing
function cleanName(name)
name = removeQuotes(name)
name = string.gsub(name, '[%[%]]', '')
local bad = string.find(name,'%(')
if bad then name = string.sub(name,1,bad-1) end
name = string.gsub(name, '^%s*(.-)%s*$', '%1')
name = string.gsub(name, '%s+', ' ')
if name == '' or string.find(name, '=') then name = nil end
return name
end
-- last name first for later sorting
-- primitive but deals with II, III and de/De prior last name
function makeLastFirst(name)
local t,n = {}, 0
for w in string.gfind(name,'(%S+)') do
n = n + 1
t[n] = w
end
local last = n
if last > 1 and
(t[n] == 'Jr' or t[n] == 'Jr.' or
t[n] == 'II' or t[n] == '2nd' or
t[n] == 'III' or t[n] == '3rd' or
t[n] == 'IV' or t[n] == '4th')
then
last = last - 1
end
for i=last-1,2,-1 do
if t[i] == 'de' or t[i] == 'De' then
last = i
t[i] = 'De' -- sort 'de' names correctly
break
end
end
name = t[last]
for i = last+1,n do
name = name..' '..t[i]
end
name = name..','
for i = 1,last-1 do
name = name..' '..t[i]
end
return name
end
function parse(nSeasons)
local t, nFound = {},0
while true do
local line = io.read() -- not io.lines() since also reading internally
if not line then break end
if string.find(line, 'wikitable') then
nFound = nFound + 1
if nFound > 2*nSeasons then break end
while true do
local line = io.read()
assert(line, 'EOF inside wikitable')
if string.find(line, '|}') then break end
if not string.find(line, 'N/A') and not string.find(line, 'Applicable')
then
line = line..'|'
if nFound <= nSeasons then
-- parse Episode Guide
-- | 1 || 1-6 || date || title || [[player1]] || p2 || [[p3]] || p4 || p5 || [[p6]]
local nField = -4
for name in string.gfind(line, '|([^|]+)|') do
nField = nField + 1
if nField > 0 then
name = cleanName(name)
if name then
if not t[name] then
t[name] = {0,0,0,0,0,0, name=name, lastFirst=makeLastFirst(name),
appears=1, aired=0}
else
t[name].appears = t[name].appears + 1
end
end
end
end
else
-- parse Results and Notes
-- | 5 || title ||player1 ||p2 ||p3 ||p4 ||p5 || p6
local nField = -2
for name in string.gfind(line, '|([^|]+)|') do
nField = nField + 1
if nField > 0 then
name = cleanName(name)
if name then
local t = t[name]
if not t then
error('Player "'..name..
'" is in a Results table but not in an Episode Guide table')
end
t[7-nField] = t[7-nField] + 1
t.aired = t.aired + 1
end
end
end
end
end
end
end
end
assert(nFound >= 2*nSeasons, 'Too few wikitables')
return t
end
-------------------------------------------------------------------------------------
-- table generation
-- English form of final position
local pos = {'1st', '2nd', '3rd', '4th', '5th', '6th'}
function posEng(t, n)
if t[n] == 0 then return nil end
if t[n] == 1 then return pos[n] end
return string.format('%s (x%d)', pos[n], t[n])
end
-- players with two or more wins (by number of wins, then winning percentage)
function mostWins(t)
for _,x in pairs(t) do
x.percent = 100*x[1]/x.aired
end
table.sort(t,
function(a,b)
if a[1] > b[1] then return true end -- descending on [1] count
if a[1] < b[1] then return false end
if a.percent > b.percent then return true end -- descending on percentage
if a.percent < b.percent then return false end
return a.lastFirst < b.lastFirst -- ascending on name
end)
for _,x in ipairs(t) do
if x[1] < 2 then break end
local line = string.format('|-\n| %s || %d || %d || %.0f%% || ',
x.name, x[1], x.aired, x.percent)
for i=2,6 do
if x[i] > 0 then
line = string.format('%s %s,',line,posEng(x,i))
end
end
if string.sub(line,-1) == ',' then line = string.sub(line,1,-2) end
print(line)
end
end
-- appeared more than twice but never won
function neverWon(t)
table.sort(t,
function(a,b)
if a[1] < b[1] then return true end -- ascending on [1] count (really eof)
if a[1] > b[1] then return false end
if a.aired > b.aired then return true end -- descending on aired
if a.aired < b.aired then return false end
return a.lastFirst < b.lastFirst -- ascending on name
end)
for _,x in ipairs(t) do
if x[1] > 0 or x.aired < 3 then break end
local line = string.format('|-\n| %s || %d || ', x.name, x.aired)
for i=2,6 do
if x[i] > 0 then
line = line..posEng(x,i)
break
end
end
print(line)
end
end
-- appeared three or more times
function mostAppearances(t)
table.sort(t,
function(a,b)
if a.appears > b.appears then return true end -- descending by appearances
if a.appears < b.appears then return false end
if a.aired > b.aired then return true end -- descending by aired
if a.aired < b.aired then return false end
return a.lastFirst < b.lastFirst -- ascending by name
end)
for _,x in ipairs(t) do
if x.appears < 3 then break end
local line = string.format('|-\n| %s || %d || %d || ',x.name,x.appears,x.aired)
for i=1,6 do
if x[i] > 0 then
line = line..posEng(x,i)
break
end
end
print(line)
end
end
-------------------------------------------------------------------------------------
-- main pgm
-- debug
function dump(t)
table.sort(t,
function(a,b)
return a.lastFirst < b.lastFirst -- ascending by name to help w/ misspellings
end)
io.stderr:write('dumping...\n')
for _,x in ipairs(t) do
if string.len(x.lastFirst) < 14 then
io.stderr:write(string.format(
' %s\t\tname="%s", appears=%d, aired=%d, results=%d,%d,%d,%d,%d,%d\n',
x.lastFirst, x.name, x.appears, x.aired, x[1], x[2], x[3], x[4], x[5], x[6]))
else
io.stderr:write(string.format(
' %s\tname="%s", appears=%d, aired=%d, results=%d,%d,%d,%d,%d,%d\n',
x.lastFirst, x.name, x.appears, x.aired, x[1], x[2], x[3], x[4], x[5], x[6]))
end
end
io.stderr:write('...end dump\n')
end
-- convert table to array
function toarray(t)
local arr,i = {},0
for _,x in pairs(t) do
i = i + 1
arr[i] = x
end
return arr
end
local usage = 'usage: lua padfixer.lua nSeasons <padinput >padoutput'
function padfixer(arg)
assert(arg[1] and arg[2] and not arg[3], usage)
local nSeasons,nWeek = tonumber(arg[1]),tonumber(arg[2])
assert(nSeasons and nSeasons > 0 and nWeek and nWeek > 0, usage)
local t = toarray(parse(nSeasons)) -- array for sorting
--dump(t)
io.write(string.format(form1,nSeasons,nWeek))
io.write(form2)
mostWins(t)
io.write(form3)
neverWon(t)
io.write(form4)
mostAppearances(t)
io.write(form5)
end
-------------------------------------------------------------------------------------
padfixer(arg) -- command-line arguments
-- padfixer.lua end