User:GreenC bot/Job 18/source
Appearance
#!/usr/local/bin/gawk -bE # # vebug - https://en.wikipedia.org/wiki/User:GreenC_bot/Job_18 # # The MIT License (MIT) # # Copyright (c) August 2019 User:GreenC (en.wikipedia.org) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. BEGIN { BotName = "vebug" } @include "botwiki.awk" @include "library.awk" @include "json.awk" BEGIN { Mode = "bot" # set to "find" and it will search only and exit with a 1 (found something) or 0 (found nothing) # in "find" mode, run via 'project -s' to search local cache for articles containing actionable matches # set to anything else and it will process the article. IGNORECASE = 1 ReSpace = "[\n\r\t]*[ ]*[\n\r\t]*[ ]*[\n\r\t]*" ReSups = "[<]sup[^>]*[>][^<]+[<][/]sup[>]" delete citeTable Optind = Opterr = 1 while ((C = getopt(ARGC, ARGV, "hs:l:n:")) != -1) { opts++ if(C == "s") # -s <file> article.txt source to process. articlename = verifyval(Optarg) if(C == "l") # -l <dir/> Directory where logging is sent.. end with "/" logdir = verifyval(Optarg) if(C == "n") # -n <name> Wikipedia name of article wikiname = verifyval(Optarg) if(C == "h") { usage() exit } } if( ! opts || articlename == "" ) { stdErr("Error in vebug.awk (1)") print "0" exit } if(wikiname == "" || logdir == "") Logfile = "/dev/null" else { if(substr(logdir, length(logdir), 1) != "/") logdir = logdir "/" Logfile = logdir "logvebug" } # Path to data directory ends in "/" DataDir = articlename gsub(regesc3(basename(articlename)) "$", "", DataDir) # Number of changes made to article Count = 0 main() } # # Run the program, save the page to disk # function main( article,articlenew,articlenewname,editsummaryname,bn,plural) { checkexists(articlename, "vebug.awk main()", "exit") article = readfile(articlename) if(length(article) < 10) { print "0" exit } article = deflate(article) articlenew = vebug(article) if(article != articlenew && length(articlenew) > 10 && Count > 0) { articlenew = inflate(articlenew) articlenewname = DataDir "article.vebug.txt" printf("%s", articlenew) > articlenewname close(articlenewname) editsummaryname = DataDir "editsummary.vebug.txt" if(Count > 1) plural = "s" printf("Restore %s cite" plural " deleted by a bug in VisualEditor ([[User:GreenC_bot/Job_18|vebug bot]])", Count) > editsummaryname # Customize the edit summary to be more specific close(editsummaryname) print Count exit } print "0" exit } # # vebug - parse page, load data into citeTable[][], modify page, return it to main() # function vebug(article, c,field,sep,i,field2,sep2,j,d,k,dest,dest2,fieldi,field2txt,setdone,key,vertemp,re,foundit) { # Special case # Regularize <sup>[[User:Claudia Diaz2/sandbox#cite%20note-5|[6]]<nowiki>]</nowiki></sup> c = patsplit(article, field, /[|][[][^]]*[]]{2}[<]nowiki[>][]][<][/]nowiki[>]/, sep) for(k = 1; k <= c; k++) { if(match(reverse(sep[k-1]), /[^[]+[[]{2}/, dest) ) { origcite = field[k] gsub(/<\/?nowiki>/, "", field[k]) field[k] = reverse(dest[0]) field[k] origTable[gsubi("^[<]nowiki[^>]*[>]|[<][/]nowiki[>]", "", field[k])] = reverse(dest[0]) origcite sep[k-1] = gsubs(reverse(dest[0]), "", sep[k-1]) } } article = unpatsplit(field, sep) # Special case # Regularize [[Politics of Venezuela#cite%20note-19|<sup>[1</sup>]] c = patsplit(article, field, /[|][<]sup[>][[][0-9]*[<][/]sup[>][]]{2}/, sep) for(k = 1; k <= c; k++) { if(match(reverse(sep[k-1]), /[^[]+[[]{2}/, dest) ) { origcite = field[k] sub(/[<]sup[>][[][^<]*[<][/]sup[>]/, "[]", field[k]) field[k] = reverse(dest[0]) field[k] origTable[gsubi("^[<]nowiki[^>]*[>]|[<][/]nowiki[>]", "", field[k])] = reverse(dest[0]) origcite sep[k-1] = gsubs(reverse(dest[0]), "", sep[k-1]) } } article = unpatsplit(field, sep) # Special case # Regularize <sup>[[User:Claudia Diaz2/sandbox#cite%20note-5|<nowiki>6]</nowiki>]]</sup> c = patsplit(article, field, /[|][<]nowiki[>][^]]*[]]{1}[<][/]nowiki[>][]]{2}/, sep) for(k = 1; k <= c; k++) { if(match(reverse(sep[k-1]), /[^[]+[[]{2}/, dest) ) { origcite = field[k] sub(/<nowiki>[^]]*[^]]/, "[", field[k]) sub(/<\/nowiki>/, "", field[k]) field[k] = reverse(dest[0]) field[k] origTable[gsubi("^[<]nowiki[^>]*[>]|[<][/]nowiki[>]", "", field[k])] = reverse(dest[0]) origcite sep[k-1] = gsubs(reverse(dest[0]), "", sep[k-1]) } } article = unpatsplit(field, sep) # Special case # Regularize [[2017 Women's March#cite%20note-FT%20100%2C000-13|<span>[13]</span>]] # works for <sup> or <span> split("sup span", tag, " ") for(k = 1; k <= 2; k++) { re = "[<]" tag[k] "[^>]*[>][[][0-9]+[]][<][/]" tag[k] "[>][]]{2}" c = patsplit(article, field, re, sep) for(i = 1; i <= c; i++) { if(match(reverse(sep[i-1]), /[|][^[]+[[]{2}/, dest) ) { origcite = field[i] re = "^[<]" tag[k] "[^>]*[>]|[<][/]" tag[k] "[>]" gsub(re, "", field[i]) field[i] = "<sup>" reverse(dest[0]) field[i] "</sup>" origTable[gsubi("^[<]" tag[k] "[^>]*[>]|[<][/]" tag[k] "[>]","",field[i])] = reverse(dest[0]) origcite sep[i-1] = gsubs(reverse(dest[0]), "", sep[i-1]) } } article = unpatsplit(field,sep) } # Special case # Convert cases not surrounded by <sup> or <span> d = patsplit(article, field2, ReSups, sep2) # Everything already surrounded by sup c = patsplit(reverse(article), field, /[]]{3}[0-9]{0,3}[[][|][0-9]{1,3}[-][^[]+[[]{2}/, sep) for(i = 1; i <= c; i++) { foundit = 0 for(j = 1; j <= d; j++) { if(field2[j] ~ regesc3(reverse(field[i]))) { foundit = 1 } } if( ! foundit) { origTable["<sup>" reverse(field[i]) "</sup>"] = reverse(field[i]) field[i] = ">pus/<" field[i] ">pus<" } } article = reverse(unpatsplit(field, sep)) # Standard: convert cites surrounded by <sup>..</sup> c = patsplit(article, field, ReSups, sep) for(i = 1; i <= c; i++) { if(field[i] !~ /[{][{][^{]*[{][{]/ && field[i] ~ /cite[%]20/) { # skip embeded templates not found by deflate() sendlog(logdir "logsups", wikiname, field[i]) # Encode embedded [0-9] so it can be parsed # <sup>[[Group of Eight#cite%20note-19|[19]]][[Group of Eight#cite%20note-20|[20]]]</sup> --> # <sup>[[Group of Eight#cite%20note-19|VEBUGO19VEBUGC]][[Group of Eight#cite%20note-20|VEBUGO20VEBUGC]]</sup> fieldi = field[i] while(match(fieldi, "[[][0-9]+[]]", dest)) { if(match(dest[0], /[0-9]+/, dest2)) field[i] = gsubs(dest[0], "VEBUGO" dest2[0] "VEBUGC", field[i]) sub("[[][0-9]+[]]", "", fieldi) } # Populate citeTable[][] delete citeTable d = patsplit(field[i], field2, /[[]{2}[^]]+[]]{1,3}/, sep2) for(j = 1; j <= d; j++) { # Decoded field2txt = field2[j] field2txt = gsubs("VEBUGO", "[", field2txt) field2txt = gsubs("VEBUGC", "]", field2txt) citeTable[field2txt]["decoded"] = field2txt key = field2txt # Encoded citeTable[key]["encoded"] = field2[j] if( empty(origTable[key])) origTable[key] = key # Cite number getCiteNumbers(key) if(abort(key, "citenumber")) continue # Primary article getTitle(key, "artprimary") if(abort(key, "artprimary")) continue # Secondary article title eg. Group of Eight getTitle(key, "artsecondary") if(abort(key, "artsecondary")) continue # Time/revision it was last added to primary article getPrimaryRevTime(key) if(abort(key, "artprimaryrevid")) continue if(abort(key, "artprimarytimestamp")) continue # Time/revision it existed in secondary article getSecondaryRevTime(key) if(abort(key, "artsecondaryrevid")) continue if(abort(key, "artsecondarytimestamp")) continue if(abort(key, "artsecondarywikitext")) continue setdone = 0 # if 1, ref is established early in process due to missing data settype = 0 if(! empty(citeTable[key]["citenumbermismatch"])) { split(citeTable[key]["citenumbermismatch"], cites, /[|]/) vertemp = "{{verify source |date=" getDateToday() " |reason=This ref was deleted Special:Diff/" citeTable[key]["artprimaryrevid"] " by a bug in VisualEditor and later identified by a bot. The original cite can be found at Special:Permalink/" citeTable[key]["artsecondaryrevid"] " (or in a rev close to it) in either cite #" cites[1] " or cite #" cites[2] " - find and verify the cite and replace this template with it (1). [[User:GreenC_bot/Job_18]]}}" field2[j] = "<ref>Citation error. See inline comment how to fix. " vertemp "</ref>" settype = 1 setdone = 1 } # Get cite from secondary by its number if( ! setdone) { getSecondaryCiteByNumer(key) if( empty(citeTable[key]["citesecondaryplaintext"]) ) { vertemp = "{{verify source |date=" getDateToday() " |reason=This ref was deleted Special:Diff/" citeTable[key]["artprimaryrevid"] " by a bug in VisualEditor and later identified by a bot. The original cite can be found at Special:Permalink/" citeTable[key]["artsecondaryrevid"] " (or in a rev close to it) as cite #" citeTable[key]["citenumber"] " - find and verify the cite and replace this template with it (2). [[User:GreenC_bot/Job_18]]}}" field2[j] = "<ref>Citation error. See inline comment how to fix. " vertemp "</ref>" settype = 2 setdone = 1 } } # Upload page to wikipedia if( ! setdone) { if( generateCitationsPage(key) == 0) { vertemp = "{{verify source |date=" getDateToday() " |reason=This ref was deleted Special:Diff/" citeTable[key]["artprimaryrevid"] " by a bug in VisualEditor and later restored by a bot in plain-text form. The original cite can be found at Special:Permalink/" citeTable[key]["artsecondaryrevid"] " (or in a rev close to it) as cite #" citeTable[key]["citenumber"] " - find and verify the cite and replace this template with it (3). [[User:GreenC_bot/Job_18]]}}" field2[j] = "<ref>" citeTable[key]["citesecondaryplaintext"] " " vertemp "</ref>" settype = 3 setdone = 1 } } # Parse page if( ! setdone) { parseCitationsPage(key) if( empty(citeTable[key]["citesecondarywikicite"]) ) { vertemp = "{{verify source |date=" getDateToday() " |reason=This ref was deleted Special:Diff/" citeTable[key]["artprimaryrevid"] " by a bug in VisualEditor and later restored by a bot in plain-text form. The original cite can be found at Special:Permalink/" citeTable[key]["artsecondaryrevid"] " (or in a rev close it it) as cite #" citeTable[key]["citenumber"] " - find and verify the cite and replace this template with it (4). [[User:GreenC_bot/Job_18]]}}" field2[j] = "<ref>" citeTable[key]["citesecondaryplaintext"] " " vertemp "</ref>" settype = 4 setdone = 1 } } if( ! setdone) { vertemp = "{{verify source |date=" getDateToday() " |reason=This ref was deleted Special:Diff/" citeTable[key]["artprimaryrevid"] " by a bug in VisualEditor and later restored by a bot from the original cite located at Special:Permalink/" citeTable[key]["artsecondaryrevid"] " cite #" citeTable[key]["citenumber"] " - verify the cite is accurate and delete this template. [[User:GreenC_bot/Job_18]]}}" field2[j] = "<ref>" citeTable[key]["citesecondarywikicite"] " " vertemp "</ref>" sendlog(logdir "logconvert", wikiname, key " ---- " citeTable[key]["citesecondarywikicite"] ) } else { sendlog(logdir "logconvert", wikiname, key " ---- " settype ) } Count++ } field[i] = unpatsplit(field2,sep2) gsub(/^[<]sup[^>]*[>]|[<][/]sup[>]$/, "", field[i]) # Save debuggung info to table.out in data directory saveTable() } } article = unpatsplit(field, sep) # Check for missed sups if(article ~ /[#]cite[%]20/ || article ~ /VEBUG[OC]/ ) { sendlog(logdir "logmissed", wikiname, "Contains cite%20 or VEBUGO . aborting") print article > DataDir "article.abort.txt" Count = 0 # Abort changes to article } return article } # # Get citations numbers # # Populates: # citeTable[key]["citenumber"] (a single cite number best guess, usually the second number) # citeTable[key]["citenumbermismatch"] (two cites numbers sep by "|" if different, otherwise blank) # function getCiteNumbers(key, buf1,buf2,dest) { citeTable[key]["citenumber"] = "" citeTable[key]["citenumbermismatch"] = "" # -55|[note 1]]$ (get "note 1") if(match(reverse(citeTable[key]["decoded"]), /^[]]{2}[^[]*[[][|]/, dest)) { gsub(/^[]]{2,3}|[[][|]$/, "", dest[0]) buf1 = reverse(dest[0]) if(! empty(strip(buf1))) citeTable[key]["citenumber"] = buf1 } # -55|[note 1]]$ (get "55") if(match(reverse(citeTable[key]["decoded"]), /[|][^-]*[^-]/, dest)) { gsub(/^[|]/, "", dest[0]) if(! empty(strip(dest[0]))) buf2 = reverse(dest[0]) } if(! empty(citeTable[key]["citenumber"]) && ! empty(buf2)) { if(buf2 != citeTable[key]["citenumber"]) citeTable[key]["citenumbermismatch"] = buf2 "|" citeTable[key]["citenumber"] } if( ! isanumber(citeTable[key]["citenumber"]) && isanumber(buf2)) citeTable[key]["citenumber"] = buf2 } # # Parse citations page User:GreenC/testcases/iabdebug # # Populate citeTable[key]["citesecondarywikicite"] # function parseCitationsPage(key, begin,i,a,b,fp,np,c,d,e) { citeTable[key]["citesecondarywikicite"] = "" # Convert page to plain-text command = "w3m -dump -cols 10000 'https://en.wikipedia.org/wiki/User:GreenC/testcases/iabdebug'" fp = sys2var(command) # Extract core surrounded by "%%%%" and "^^^^" begin = 0 for(i = 1; i <= splitn(fp, a, i); i++) { if(a[i] ~ /^[%]{4}$/) { begin = 1 continue } if(a[i] ~ /^[\\^]{4}$/) begin = 0 if(begin) np = np "\n" a[i] } # Extract records surrounded by "@@@@" c = split(np, b, /[@]{4}\n/) for(i = 1; i <= c; i++) { if(! empty(strip(b[i]))) { d = split(strip(b[i]), e, /[+]{4}\n/) e[1] = gsubs("^[dead link]", "", e[1]) # print strip(e[1]) " = " citeTable[key]["citesecondaryplaintext"] if(strip(e[1]) == citeTable[key]["citesecondaryplaintext"]) { citeTable[key]["citesecondarywikicite"] = strip(e[2]) break } } } } # # Generate a list of citations in parsable format and upload to User:GreenC/testcases/iabdebug # function generateCitationsPage(key, c,b,i,k,bp,np,status) { bp = "\n<p>\n" np = setdatetype(citeTable[key]["artsecondarywikitext"]) bp # need to set date type so CS1|2 date display is consistent np = np "%%%%" bp c = split(citeTable[key]["artsecondarywikitext"], b, "<ref[^>]*>") for(i = 1; i <= c; i++) { k = strip(substr(b[i], 1, match(b[i], "</ref>") - 1)) if( ! empty(k)) { np = np "@@@@" bp np = np k bp np = np "++++" bp np = np "<nowiki>" k "</nowiki>" bp } } np = np "^^^^" bp # Upload page for(i = 1; i <= 3; i++) { status = sys2varPipe(np, "wikiget -E " shquote("User:GreenC/testcases/iabdebug") " -S " shquote(citeTable[key]["artprimary"] " -/- " citeTable[key]["artsecondary"]) " -P STDIN") if(status ~ "Success" || status ~ "No change") { sleep(5) return 1 } else sleep(5) } sendlog(logdir "syslog", wikiname, "Error: Unable to upload to User page, wikiget returns: " status) return 0 } # # Get plain-text version of given cite number in secondary article # populates citeTable[key]["citesecondaryplaintext"] # on error set to blank # function getSecondaryCiteByNumer(key, command,fp,citenum,a,i) { citeTable[key]["citesecondaryplaintext"] = "" # Plain text of secondary article command = "w3m -dump -cols 10000 " shquote("https://en.wikipedia.org/w/index.php?title=" urlencodeawk(citeTable[key]["artsecondary"]) "&oldid=" citeTable[key]["artsecondaryrevid"] ) fp = sys2var(command) # Get the cite # in plain-text if( int(citeTable[key]["citenumber"]) < 10) citenum = "^[ ]" citeTable[key]["citenumber"] "[.][ ][\\^]" else citenum = "^" citeTable[key]["citenumber"] "[.][ ][\\^]" for(i = 1; i <= splitn(fp, a, i); i++) { if(a[i] ~ citenum) { sub(citenum, "", a[i]) gsub(/[\\^][a-z]{1,3}[ ]/, "", a[i]) a[i] = gsubs("^[dead link]", "", a[i]) citeTable[key]["citesecondaryplaintext"] = strip(a[i]) # break # keep going to get past any duplicates in 'notes' section .. this is imperfect though } } } # # What time and revision was the bug added to the primary article? # populates: # citeTable[key]["artsecondaryrevid"] # citeTable[key]["artsecondarytimestamp"] # citeTable[key]["artsecondarywikitext"] # on error they are blank # function getSecondaryRevTime(key, jsona,i,cont,unixprimary,command,j,arrevid,artimestamp,arcontinue,a,maxrevs,prevcontinue) { citeTable[key]["artsecondaryrevid"] = "" citeTable[key]["artsecondarytimestamp"] = "" citeTable[key]["artsecondarywikitext"] = "" if(! empty(citeTable[key]["artprimarytimestamp"])) unixPrimary = unixTime(citeTable[key]["artprimarytimestamp"]) else return maxrevs = 20 i = 0 cont = 1 while(cont) { i++ if(i > maxrevs) { sendlog(logdir "syslog", wikiname, "Error: Exceeded " maxrevs " API requests in getSecondaryRevTime()") break # sanity break } if(empty(arcontinue["continue"])) command = "wget -q -O- " shquote("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=" urlencodeawk(citeTable[key]["artsecondary"]) "&rvlimit=50&rvslots=main&rvprop=timestamp%7Cuser%7Cids&rvdir=older&format=json") else command = "wget -q -O- " shquote("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=" urlencodeawk(citeTable[key]["artsecondary"]) "&rvlimit=50&rvslots=main&rvprop=timestamp%7Cuser%7Cids&rvdir=older&format=json&rvcontinue=" urlencodeawk(arcontinue["continue"]) ) if(query_json(sys2var(command), jsona) >= 0) { #debug #awkenough_dump(jsona, "jsona") # jsona["query","pages","1196634","revisions","1","revid"]=7741763 splitja(jsona, arrevid, 5, "revid") # jsona["query","pages","1196634","revisions","1","timestamp"]=2004-11-22T05:21:18Z splitja(jsona, artimestamp, 5, "timestamp") # jsona["continue","rvcontinue"]=20041122055516|7769047 splitja(jsona, arcontinue, 1, "rvcontinue") for(j = 1; j <= length(arrevid); j++) { if(unixTime(artimestamp[j]) <= unixPrimary) { if(unixTime(artimestamp[j]) == unixPrimary && citeTable[key]["artprimary"] == citeTable[key]["artsecondary"]) { # same article same diff get prev if(j == 50) break # size of block requested in API j++ } delete a tup(getwikisource(citeTable[key]["artsecondary"], "dontfollow", "wikipedia.org", "en", arrevid[j]), a) if(a[1] != "REDIRECT" && ! empty(a[1])) { citeTable[key]["artsecondaryrevid"] = arrevid[j] citeTable[key]["artsecondarytimestamp"] = artimestamp[j] citeTable[key]["artsecondarywikitext"] = a[1] cont = 0 break } else { if( empty(a[1]) && empty(a[2]) ) { # revision is empty sendlog(logdir "syslog", wikiname, "Error: empty secondary revision (" citeTable[key]["artsecondary"] "): " artimestamp[j]) } cont = 0 break } } } prevcontinue = arcontinue["continue"] } } } # # Get article title (follow redirects) # function getTitle(key1,key2, dest,a) { citeTable[key1][key2] = "" if(key2 == "artsecondary") { if(match(key1, /^[[][^#]+[^#]/, dest) > 0) { sub(/^[[]{2}/, "", dest[0]) tup(getwikisource(dest[0], "dontfollow", "wikipedia.org", "en"), a) if(a[1] == "REDIRECT") { gsub(/^#REDIRECT[ ]*[[]{2}|[]]$/, "", a[2]) citeTable[key1][key2] = a[2] } else citeTable[key1][key2] = dest[0] } } else if(key2 = "artprimary") { tup(getwikisource(wikiname, "dontfollow", "wikipedia.org", "en"), a) if(a[1] == "REDIRECT") { gsub(/^#REDIRECT [[]{2}|[]]$/, "", a[2]) citeTable[key1][key2] = a[2] } else citeTable[key1][key2] = wikiname } } # # What time and revision was the bug added to the primary article? # populates: # citeTable[key]["artprimaryrevid"] # citeTable[key]["artprimarytimestamp"] # on error they are blank # function getPrimaryRevTime(key, jsona,i,cont,command,arrevid,atimestamp,arcontinue,j,a,prevrevid,prevtimestamp,maxrevs,prevcontinue,jsonin) { citeTable[key]["artprimaryrevid"] = "" citeTable[key]["artprimarytimestamp"] = "" maxrevs = 20 i = 0 cont = 1 while(cont) { i++ if(i > maxrevs) { sendlog(logdir "syslog", wikiname, "Error: Exceeded " maxrevs " API requests in getPrimaryRevTime()") break # sanity break } if(empty(arcontinue["continue"])) command = "wget -q -O- " shquote("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=" urlencodeawk(citeTable[key]["artprimary"]) "&rvlimit=50&rvslots=main&rvprop=timestamp%7Cuser%7Cids&rvdir=older&format=json") else command = "wget -q -O- " shquote("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=" urlencodeawk(citeTable[key]["artprimary"]) "&rvlimit=50&rvslots=main&rvprop=timestamp%7Cuser%7Cids&rvdir=older&format=json&rvcontinue=" urlencodeawk(arcontinue["continue"]) ) jsonin = sys2var(command) if(query_json(jsonin, jsona) >= 0) { #debug #awkenough_dump(jsona, "jsona") # jsona["query","pages","1196634","revisions","1","revid"]=7741763 splitja(jsona, arrevid, 5, "revid") # jsona["query","pages","1196634","revisions","1","timestamp"]=2004-11-22T05:21:18Z splitja(jsona, artimestamp, 5, "timestamp") # jsona["continue","rvcontinue"]=20041122055516|7769047 splitja(jsona, arcontinue, 1, "rvcontinue") if(arcontinue["continue"] == prevcontinue) { # reached last revision j = length(arrevid) delete a tup(getwikisource(citeTable[key]["artprimary"], "dontfollow", "wikipedia.org", "en", arrevid[j]), a) if(a[1] != "REDIRECT" && ! empty(a[1])) { citeTable[key]["artprimaryrevid"] = arrevid[j] citeTable[key]["artprimarytimestamp"] = artimestamp[j] citeTable[key]["artprimarywikitext"] = a[1] cont = 0 break } else { sendlog(logdir "syslog", wikiname, "Error: empty primary revision: " artimestamp[j]) cont = 0 break } } for(j = 1; j <= length(arrevid); j++) { # step through each revision for this batch tup(getwikisource(citeTable[key]["artprimary"], "dontfollow", "wikipedia.org", "en", arrevid[j]), a) if(a[1] != "REDIRECT" && ! empty(a[1])) { # if(countsubstring(a[1], citeTable[key]["decoded"]) == 0) { if(countsubstring(a[1], origTable[key]) == 0) { if(! empty(prevrevid)) { citeTable[key]["artprimaryrevid"] = prevrevid citeTable[key]["artprimarytimestamp"] = prevtimestamp } else { citeTable[key]["artprimaryrevid"] = arrevid[j] citeTable[key]["artprimarytimestamp"] = artimestamp[j] } cont = 0 break } else { prevrevid = arrevid[j] prevtimestamp = artimestamp[j] } } else { if( empty(a[1]) && empty(a[2]) ) { # revision is empty if( citeTable[key]["artprimary"] != citeTable[key]["secondary"] ) { citeTable[key]["artprimaryrevid"] = arrevid[j] citeTable[key]["artprimarytimestamp"] = artimestamp[j] } else sendlog(logdir "syslog", wikiname, "Error: empty primary revision: " artimestamp[j]) cont = 0 break } } } prevcontinue = arcontinue["continue"] } } } # # Given a Wikipedia datestring, return unix-time string (seconds since 1970) # function unixTime(s) { return sys2var("date --date=" shquote(s) " +%s") } # # Return todays date as "August 2019" # function getDateToday() { return sys2var("date +\"%B %Y\"") } # # Determine article date type {{set dmy dates}} etc # imported from medilibrary.nim # function setdatetype(art, reDmy,reMdy,i,a) { reDmy = "[{]{2}" ReSpace "use" ReSpace "dmy" ReSpace "d?a?t?e?s?|[{]{2}" ReSpace "dmy" ReSpace "[|]|[{]{2}" ReSpace "dmy" ReSpace "[}]|[{]{2}" "use[ -]?dmy" reMdy = "[{]{2}" ReSpace "use" ReSpace "mdy" ReSpace "d?a?t?e?s?|[{]{2}" ReSpace "mdy" ReSpace "[|]|[{]{2}" ReSpace "mdy" ReSpace "[}]|[{]{2}" "use[ -]?mdy" for(i = 1; i <= splitn(art, a, i); i++) { if(a[i] ~ reDmy) return "{{dmy}}" if(a[i] ~ reMdy) return "{{mdy}}" } return "" } # # Abort check # function abort(key1, key2) { if( empty( strip(citeTable[key1][key2]) ) ) { if(key2 == "citesecondaryplaintext") { sendlog(logdir "logabort", wikiname, key1 " ---- " citeTable[key1]["citesecondaryplaintext"] " ---- " key2 " missing") } else { sendlog(logdir "logabort", wikiname, key1 " ---- " key2 " missing") } return 1 } return 0 } # # Reverse string # function reverse(s, i,len,a,r) { len = split(s, a, "") for(i = 1; i <= len; i++) r = a[i] r return r } function saveTable() { # Save debuggung info to table.out in data directory printtable = 1 if(printtable) { for(kk in citeTable) { print "\n -------------- \n" >> DataDir "table.out" print "origTable[" kk "] = " origTable[kk] >> DataDir "table.out" for(ll in citeTable[kk]) { if(ll != "artsecondarywikitext") print "citeTable[" kk "][" ll "] = " citeTable[kk][ll] >> DataDir "table.out" else { print "citeTable[" kk "][" ll "] = " length(citeTable[kk][ll]) >> DataDir "table.out" } } } } }