User:Dr pda/generatestats.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Dr pda/generatestats. |
//<pre>
//This script generates a list of the ten shortest and ten longest articles which transclude a template,
//e.g. {{featured article}}, calculates some statistics and plots a histogram.
//To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js
//then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestats&action=edit
//See the talk page for documentation.
function keyValuePair(key,value){
this.key = key;
this.value = value;
}
function sortByValue(a, b){
return a.value - b.value
}
function getBestScale(min,max){
scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000);
var val = (max-min)/15;
for(var x in scales){
if (scales[x]-val >= 0) return scales[x];
}
return 5000;
}
function loadXMLDocPassingTemplate(url,handler,template)
{
// branch for native XMLHttpRequest object
if (window.XMLHttpRequest) {
var req = new XMLHttpRequest();
}
// branch for IE/Windows ActiveX version
else if (window.ActiveXObject) {
var req = new ActiveXObject("Microsoft.XMLHTTP");
}
if (req) {
req.onreadystatechange = function () {handler(req,template)};
req.open("GET", url, true);
req.send("");
}
}
function getSizeFromAPI(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
if(useTalkCategory || useTemplateCategory) jobsLeft--;
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
for(var i=0;i<pages.length; i++){
pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
}
document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.';
//Check for more pages
var embeddedin = response.getElementsByTagName('embeddedin');
if(embeddedin.length > 0){
var geicontinue = embeddedin[0].getAttribute('geicontinue');
if(useTalkCategory || useTemplateCategory) jobsLeft++;
loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template);
}
//If last page retrieved then start processing
else if(jobsLeft == 0){
//If using wiki text size
if(document.location.href.indexOf('prosesize') == -1){
sortAndMakeChart();
}
//If using readable prose size (WARNING:Will load every page which transcludes template. Could be thousands of pages!!)
else{
for(var x in pagesList){
var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_'));
loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key);
}
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getArticlePageFromTalkPage(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
for(var i=0;i<pages.length; i++){
articleList.push(pages[i].getAttribute('subjectid'));
}
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(talkQueryURL+'&gcmcontinue='+gcmcontinue,getArticlePageFromTalkPage,template);
}
//All pages retrieved
else{
var pageIds='';
for(i in articleList){
//API limited to 50 titles per query
if( i%50 == 0 && i>0){
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
pageIds='';
}
pageIds += '|' + articleList[i];
}
//Process remainder
pageIds = pageIds.substr(1);
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+pageIds,getSizeFromAPI,template);
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getPagesFromTemplateCategory(req,template) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseXML.documentElement;
var pages = response.getElementsByTagName('page');
if(pages.length > 0){
for(var i=0;i<pages.length; i++){
articleList.push(encodeURIComponent(pages[i].getAttribute('title')));
}
var categorymembers = response.getElementsByTagName('categorymembers');
if(categorymembers.length > 0){
var gcmcontinue = categorymembers[0].getAttribute('gcmcontinue');
loadXMLDocPassingTemplate(templateQueryURL+'&gcmcontinue='+gcmcontinue,getPagesFromTemplateCategory,template);
}
//All pages retrieved
else{
for(i in articleList){
//API embeddedin query can only take one title
jobsLeft++;
loadXMLDocPassingTemplate(queryURL+articleList[i],getSizeFromAPI,template);
}
}
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function getProseSizeFromPage(req,title) {
// only if req shows "loaded"
if (req.readyState == 4) {
// only if "OK"
if (req.status == 200) {
// ...processing statements go here...
var response = req.responseText;
var start = response.indexOf('<p>',-1);
var stop = 0;
var proseSize = 0;
while(start > -1){
stop = response.indexOf('</p>',start);
var para = response.substring(start+3,stop);
para = para.replace(/\[\d{1,3}\]/g,'');
para = para.replace(/citation needed/g,'');
para = para.replace(/(<([^>]+)>)/ig,'');
proseSize += para.length;
start = response.indexOf('<p>',stop);
}
proseList[proseIndex++] = new keyValuePair(title,proseSize);
document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.';
//If last page retrieved then start processing
if(proseIndex == index){
pagesList = proseList;
sortAndMakeChart();
}
} else {
alert("There was a problem retrieving the XML data:\n" +
req.statusText);
}
}
}
function sortAndMakeChart(){
pagesList.sort(sortByValue);
//Get top ten and bottom ten
var bottomTen = '===Ten shortest articles===\n';
for(var i=0;i<10;i++){
bottomTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
pagesList.reverse();
var topTen = '===Ten longest articles===\n';
for(var i=0;i<10;i++){
topTen += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
var list = '===List of articles by size===\n';
if(document.location.href.indexOf('&list') != -1){
for(var i=0;i<pagesList.length;i++){
list += ('# [[' + pagesList[i].key + ']] (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
}
}
//Get Range
var max = Math.ceil(pagesList[0].value/1024);
var min = Math.floor(pagesList[pagesList.length-1].value/1024);
var xScale = getBestScale(min,max);
max = Math.ceil(max/xScale)*xScale;
min = Math.floor(min/xScale)*xScale;
var numBins = (max - min)/xScale;
//Calculate statistics
var sum = 0.0;
var bins = new Array(numBins);
for(var i=0;i<numBins;i++){
bins[i]=0;
}
for(var i=0;i<pagesList.length;i++){
sum += pagesList[i].value*1.0;
bins[Math.floor((pagesList[i].value/1024-min)/(xScale*1.0))]++;
}
var mean = (sum/(pagesList.length*1024)).toFixed(3);
var median = (pagesList[Math.floor(pagesList.length/2)+1].value/1024).toFixed(3);
var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
//Calculate best vertical scale
var yMax = Math.max.apply(Math,bins);
var yScale = getBestScale(0,yMax);
yScale = Math.max(1,yScale);
yMax = Math.ceil(yMax/yScale)*yScale;
var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + ' start:0';
if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
//Draw chart
var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:303 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n';
for(var i=0;i<numBins;i++){
chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
}
//Add axis label
chart += ' bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
if(document.location.href.indexOf('&list') != -1){
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
}
else{
document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
}
document.getElementById('wpPreview').click();
}
function generateStatistics(){
pagesList = new Array();
index = 0;
proseList = new Array();
proseIndex = 0;
articleList = new Array();
template ='';
queryURL ='';
talkQueryURL ='';
templateQueryURL ='';
jobsLeft = 0;
namespace = '0';
useTalkCategory = (document.location.href.indexOf('usetalkcategory') != -1) ? true : false;
useTemplateCategory = (document.location.href.indexOf('usetemplatecategory') != -1) ? true : false;
specifyNamespace = (document.location.href.indexOf('specifynamespace') != -1) ? true : false;
if(specifyNamespace){
namespace=prompt("Enter the number of the namespace the pages are in\n (0=article, 2=User, 4=Wikipedia etc)","");
}
if(useTalkCategory){
template=prompt("Enter the talk page category you want to check for\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
talkQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=1&prop=info&inprop=subjectid&format=xml';
queryURL = '/w/api.php?action=query&prop=info&format=xml&pageids=';
loadXMLDocPassingTemplate(talkQueryURL,getArticlePageFromTalkPage,template);
}
else if(useTemplateCategory){
template=prompt("Enter the template category you want to check\n (Don't include Category:)","");
template = "Category:"+template.toUpperCase().substr(0,1)+template.substr(1);
templateQueryURL = '/w/api.php?action=query&generator=categorymembers&gcmtitle=' + template + '&gcmlimit=500&gcmnamespace=10&prop=info&format=xml';
queryURL = '/w/api.php?action=query&generator=embeddedin&geilimit=500&geinamespace=0&prop=info&format=xml&geititle=';
loadXMLDocPassingTemplate(templateQueryURL,getPagesFromTemplateCategory,template);
}
else{
template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=' + namespace + '&prop=info&format=xml';
loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
}
document.getElementById('wpTextbox1').value = 'Started.';
}
addOnloadHook(function () {
if(document.location.href.indexOf('User:Dr_pda/generatestats&action=edit') != -1){
generateStatistics();
}
});
//</pre>