User:ZackBot/model cleanup
Appearance
#!/usr/bin/env ruby
# encoding: utf-8
require 'mediawiki_api'
require 'HTTParty'
require 'csv'
require 'open-uri'
require './helper'
require 'fileutils'
def exactly_one_time(text, param, regex)
count = text.scan(regex).size
if count > 1
puts "- ERROR: '#{param}' appears more than one time on the page."
return false
elsif count == 0
puts "- ERROR: '#{param}' does not appear on the page"
return false
end
true
end
PARAMS = ["bust", "chest", "collar", "dress_size", "dress size", "dresssize", "hips", "measurements", "suit", "shoe_size", "shoe size", "shoesize", "waist", "weight", "weight_kg", "weight_st", "weight_lb", "weight_lbs"]
INFOBOX_REGEX = /(?=\{\{[Ii]nfobox\s(?:[Mm]odel|male\smodel|Playboy\sCyber\s[Gg]irl))(\{\{(?>[^{}]++|\g<1>)*}})/
CATEGORY = "[[Category:Pages using infobox model with deprecated parameters|deprecated parameters]]"
QUERY_URL = "https://petscan.wmflabs.org/?psid=618857&format=json"
Helper.read_env_vars
client = MediawikiApi::Client.new 'https://en.wikipedia.org/w/api.php'
client.log_in ENV['USERNAME'], ENV['PASSWORD']
json = JSON.load(open(QUERY_URL))
titles = json["*"].first["a"]["*"].map{ | page| page["title"].gsub("_"," ")}
puts titles.size
# For testing
# pages = File.open('test.txt').read
# pages.each_line do |title|
titles.each do |title|
title.strip!
puts title
full_text = client.get_wikitext(title).body
next if Helper.no_bots?(full_text)
next unless (exactly_one_time(full_text, "Infobox Model", INFOBOX_REGEX))
infobox_text = full_text.match(INFOBOX_REGEX)[0]
PARAMS.each do |param|
infobox_text.gsub!(/\|\s*#{param}\s*=.*\n/, "")
end
full_text.gsub!(INFOBOX_REGEX, infobox_text)
client.edit(title: title, text: full_text, summary: "Fixing infobox not to use #{CATEGORY}")
puts "- SUCCESS"
end
puts "DONE"