User:Finnusertop/reviewsourcecheck.js: Difference between revisions

Content deleted Content added
Inline
Latest revision as of 17:33, 25 February 2020

__NOINDEX__
//if (window.checkLinksToCitations === undefined) {
//    window.checkLinksToCitations = true;
//}
jQuery(document).ready(function($) {
    // keep track of whether there are any Harv citations; if there are none,
    // the second check is useless.
    // var hasHarvs = false;
    if ((wgPageName.indexOf('talk:') < 0) && (wgPageName.indexOf('Special:') < 0) && (wgPageName.indexOf('Wikipedia:') < 0)) {


        // spantitles  gives direct access to span.title,
        // which is used extensively

        var spantitles = document.getElementsByClassName("Z3988");
        var myTOC = document.getElementsByClassName("toctext");
        // var myHeadings = $("h2, h3, h4");
        myTOCarray = [];
        for (var z = 0; z < myTOC.length; z++) {
            myTOCtxt = myTOC[z].innerText;
            myTOCtxt = myTOCtxt.replace(" ", "_");
            myTOCtxt = "#" + myTOCtxt;
            myTOCarray.push(myTOCtxt);
        }
        myTOCarray.reverse();

        //var citejournals = document.getElementsByClassName("citation journal");
        // var reftext = document.getElementsByClassName("reference-text");  // all sfns
        // var bookspan = document.querySelectorAll("citation book.span title");
        //var webspan = document.querySelectorAll("citation web.span title");
        // spantitle[13].title

        // first check: 
        var links = document.links;
        for (var i = 0; i < links.length; i++) {
            var href = links[i].getAttribute('href');

            var srctxt = links[i].parentNode.textContent;
            try {
                var id = links[i].getAttribute('id');
            } catch (err) {
                continue;
            }

            var parent = links[i].parentNode;

            // var index is used below to address the case of sfnm,
            // whose output (i. e., links[i].parentNode.innerHTML) includes different links as siblings, 
            // so the output would be recursively expanded/duplicated
            // within the loop unless you prevent that

            var index = Array.prototype.indexOf.call(parent.children, links[i - 1]);
            var spline = srctxt.split(";");
            for (var k = 0; k < spline.length; k++) {
                var commacount = (spline[k].match(/,/g) || []).length;
                //var hrefcount = (links[i].parentNode.innerHTML.match(spline[k], 'g') || []).length;
                if (index < 0) {

                    if (spline[k].indexOf('pp.') > 0) {

                        spline[k] = spline[k].replace("Harv error: link to", "            ");

                        /* temptxt and commacount2 are used to avoid false positives 
                            like " Brennan, Heathcote & Lucas 1984, p. 9" (commas and 
                            ampersand before p. 9)
                           and "Jones 1942, p. 10, note 3" (commas irrelevant to pagination) */
                        var myPos = spline[k].indexOf('pp.');
                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if (temptxt.indexOf('-') > 0) {
                            links[i].parentNode.innerHTML +=
                                " <strong class=error> Hyphen in pg. range;  </strong>";
                        }

                        if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf('–') < 0) && (spline[k].indexOf('&') < 0) && (commacount < 2) && (spline[k].indexOf('-') < 0) && (spline[k].indexOf(' and ') < 0) && (spline[k].indexOf('&ndash;') < 0)) {

                            links[i].parentNode.innerHTML +=
                                " <strong class=error> P/PP error? " +
                                temptxt + "; </strong>";

                        }
                    } else if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf(' p.')) > 0) {
                        spline[k] = spline[k].replace("Harv error: link to", "            ");
                        var myPos = spline[k].indexOf(' p.');

                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if ((temptxt.indexOf('–') > 0) || (commacount2 > 0) || (temptxt.indexOf('-') > 0) || (temptxt.indexOf('&ndash;') > 0)) {

                            //p. 23, note 7; p. 23, n. 7; p.23, citing Smith 1989
                            //
                            if ((temptxt.indexOf(', not') < 0) && (temptxt.indexOf(', n.') < 0) && (temptxt.indexOf(', cit')) < 0) {


                                links[i].parentNode.innerHTML +=
                                    " <strong class=error> P/PP error? " +
                                    temptxt + "; </strong>";
                            }
                        }
                        if (temptxt.indexOf('-') > 0) {
                            links[i].parentNode.innerHTML +=
                                " <strong class=error> Hyphen in pg. range;  </strong>";
                        }
                    }
                }
            }
        }

        // second check: 
        var withLocs = false;
        var withoutLocs = false;
        var contraryLocs = false;
        var withLocsCnt = 0;
        var withoutLocsCnt = 0;

        for (i = 0; i < spantitles.length; i++) {

            // there is nothing in spantitles[i].title
            // which indicates that a link has been archived, so 
            // srctext is used to catch from textContent

            srctxt = spantitles[i].parentNode.textContent;
            spline = spantitles[i].title.split("rft.au=");
            if (srctxt.indexOf(". In ") > 0) {
                if ((srctxt.indexOf(" pp.") < 0) && (srctxt.indexOf(" p.") < 0)) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=error> Caution: Missing pagenums for book chapter? </strong>";
                }
            }
            for (k = 1; k < spline.length; k++) {
                if ((spantitles[i].title.indexOf("rft.au=") > 0) && (srctxt.indexOf("et al.") < 0)) {
                    if ((spline[k].indexOf("+") < 0)) {
                        var spline2 = spline[k].split("&");
                        spantitles[i].parentNode.innerHTML +=
                            " <strong class=error> Missing first name for: <u>" + spline2[0] +
                            "</u>; </strong>";
                    }
                }
            }

            if (spantitles[i].title.indexOf("rft.genre=book") > 0) {

                if ((spantitles[i].title.indexOf("rft.place") < 0) && (srctxt.indexOf("Oxford University Press") < 0) && (srctxt.indexOf("Cambridge University Press") < 0)) {
                    withoutLocs = true;
                    withoutLocsCnt += 1;
                    if ((withLocs === true) && (withoutLocs === true)) {
                        contraryLocs = true;

                    }
                    if (contraryLocs === true) {
                        spantitles[i].parentNode.innerHTML +=
                            "<strong class=error> Inconsistent use of Publisher Location (" +
                            withLocsCnt + " with; " + withoutLocsCnt + " <u>without</u>); </strong>";
                    }
                } else {
                    withLocs = true;
                    withLocsCnt += 1;
                    if ((withLocs === true) && (withoutLocs === true)) {
                        contraryLocs = true;

                    }
                    if (contraryLocs === true) {
                        spantitles[i].parentNode.innerHTML +=
                            "<strong class=error> Inconsistent use of Publisher Location (" +
                            withLocsCnt + " <u>with;</u> " + withoutLocsCnt + " without); </strong>";
                    }

                }
                if (spantitles[i].title.indexOf("rft.pub") < 0) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=error> Missing Publisher; </strong>";
                }

                if ((spantitles[i].title.indexOf("rft.date") > 0)) {
                    var myDate = spantitles[i].title.slice(spantitles[i].title.indexOf("rft.date") + 9, spantitles[i].title.indexOf("rft.date") + 13);
                    if (myDate >= 1970) {
                        if ((spantitles[i].title.indexOf("rft.isbn") < 0) && (srctxt.indexOf(" doi:") < 0) && (srctxt.indexOf("LCCN") < 0) && (srctxt.indexOf("OCLC") < 0) && (srctxt.indexOf("ISSN") < 0) && (srctxt.indexOf("MR") < 0)) {
                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=error> Missing ISBN; </strong>";
                        }
                    } else {
                        if ((spantitles[i].title.indexOf("rft.isbn") > 0) && (srctxt.indexOf(") [") < 0)) {

                            // OK this (") [") is a kluge but there's nothing to indicate 
                            // whether origyear is populated except by the srctext
                            // having (pubdate) [origdate] & there's little restriction
                            // on the format of the two dates
                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=error> Pub. too early for ISBN, perhaps needs {{para|orig-year}}; </strong>";
                        }
                        if ((srctxt.indexOf("LCCN") < 0) && (srctxt.indexOf("OCLC") < 0) && (srctxt.indexOf("ISSN") < 0)) {
                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=error> Missing Identifier/control number, e.g. OCLC; </strong>";
                        }
                    }
                } else {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=error> Missing Year/Date; </strong>";
                }
            }

       //   if ((spantitles[i].title.indexOf("http") > 0) && (spantitles[i].title.indexOf("rft.genre=book") < 0)) {
       //       if (srctxt.indexOf("Archived") < 0) {
       //           spantitles[i].parentNode.innerHTML +=
       //               " <strong class=error> Missing archive link; </strong>";
       //           if ((srctxt.indexOf("Retrieved") < 0) && (spantitles[i].title.indexOf("rft.date") < 0)) {
       //               spantitles[i].parentNode.innerHTML +=
       //                   " <strong class=error> Missing access date; </strong>";
       //           }
       //
                }
            }
        }

        // third check: 
        // sorting
        // finding duplicate author/title, 
        // removing meaningless initial words, 
        // handling "(April 2007)" etc., 
        // sorting <h2>Bibliography (no text)<h3> Biographies(lotsa sources) by using
        ///reverse TOC order and skipping if alreadySorted 
        // see [[Andrew Jackson]] forex

        var refheaders = [];
        var allRefheaders = ["#Books", "#Journals", "#Biographies", "#Bibliography", "#References", "#Citations_and_notes", "#Literature_cited", "#Works_cited", "#Book_sources", "#Primary_sources", "#Secondary_sources", "#Sources", "#Specialized_studies", "#Primary_sources,_available_in_English"];


        /// refheaders lets us sort in reverse TOC order
        for (var d = 0; d < myTOCarray.length; d++) {

            if (allRefheaders.indexOf(myTOCarray[d]) > -1) {
                refheaders.push(myTOCarray[d]);
            }
        }

        var alreadySorted = [];
        for (var r = 0; r < refheaders.length; r++) {

            var refsection = jQuery(refheaders[r]).parent().next();
            var newcites = refsection.find('.citation');

            sortedCites = [];
            unsortedCites = [];
            sortIndices = [];
            var id3 = '';
            var oldAuth = '';
            var mySortTxt2 = '';
            for (var h = 0; h < newcites.length; h++) {



                if (alreadySorted.indexOf(newcites[h]) > -1) {
                    continue;
                }

                try {
                    id3 = newcites[h].getAttribute('id');
                } catch (err) {
                    //sortIndices.push(h);
                    continue;
                }
                if (!id3 || id3.indexOf('CITEREF') === 0) {
                    var parentid =
                        newcites[h].parentNode.parentNode.getAttribute('id');

                    if (parentid && parentid.indexOf('cite_note') > -1) {

                        continue;
                    }

                    if (!id3 || id3.indexOf('CITEREF') < 0) {
                        newcites[h].innerHTML +=
                            " <strong class=warning> Caution: Missing <i>ref=<i/> anchor?; </strong>";
                    }
                    mySortTxt2 = newcites[h].innerText;
                    mySortTxt2 = mySortTxt2.replace('"', '');

                    // check for empty string
                    if (mySortTxt2 === '') {

                        continue;
                    }
                    mySortTxt2 = mySortTxt2.trim();

                    if (mySortTxt2.slice(0, 2) === "A ") {
                        mySortTxt2 = mySortTxt2.slice(2);
                        mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                    }
                    if (mySortTxt2.slice(0, 3) === "An ") {
                        mySortTxt2 = mySortTxt2.slice(3);
                        mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                    }
                    if (mySortTxt2.slice(0, 4) === "The ") {
                        mySortTxt2 = mySortTxt2.slice(4);
                        mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                    }

                    // for example, in |author-mask={{long dash}} 
                    if (mySortTxt2[0] == "—") {

                        mySortTxt2 = oldAuth + mySortTxt2;
                    }
                    mySortTxt2 = mySortTxt2.replace('"', '');
                    if (mySortTxt2.indexOf(")") > 0) {
                        mySortTxt2 = mySortTxt2.substring(0, mySortTxt2.indexOf(")")) + ")";

                        // handle (April 2006) or (04-11-2006) or anything not (YYYY)
                        mySortTxt3 = /\d{4}/.exec(mySortTxt2);
                        mySortTxt2 = mySortTxt2.replace(mySortTxt3, "");
                        if (mySortTxt3 != null) {
                            mySortTxt2 = mySortTxt2.replace("(", "(" + mySortTxt3 + "-");
                        }
                        mySortTxt2 = mySortTxt2.replace(" )", ")")

                    }
                    if ((mySortTxt2.match(/\s/g) || []).length > 5) {

                        mySortPos2 = mySortTxt2.split(" ", 5).join(" ").length;
                        mySortTxt2 = mySortTxt2.substring(0, mySortPos2);
                    }
                    /* else {
                          mySortPos2 = mySortTxt2.split(" ", (mySortTxt2.match(/\s/g) || []).length).join(" ").length;
                                                mySortTxt2 = mySortTxt2.substring(0, mySortPos2);
                                            }
                         */
                    //remove html  '' mySortTxt2=mySortTxt2.replace(/<(?:.|\n)*?>/gm, '');

                    // oldauth is for |author-mask={{long dash}} 
                    // below, rmv all digits, parens, curly braces, endashes
                    //    oldAuth = mySortTxt2.replace(/\d+/g, '');
                    oldAuth = mySortTxt2.replace(/[{()}]/g, '');
                    oldAuth = oldAuth.replace(/—/g, "");
                    oldAuth = oldAuth.replace(/\)/g, '');
                    oldAuth = oldAuth.trim()
                    sortedCites.push(mySortTxt2);
                    if (unsortedCites.indexOf(mySortTxt2) > -1) {
                        newcites[h].innerHTML +=
                            " <strong class=warning> Warning: duplicate author/date: " + mySortTxt2 + "; </strong>";
                    }
                    unsortedCites.push(mySortTxt2);
                    sortIndices.push(h);

                    alreadySorted.push(newcites[h]);

                }
            }
            sortedCites.sort(Intl.Collator().compare);

            var myPos2 = 0;
            var txtOut = '';
            for (var p = 0; p < unsortedCites.length; p++) {
                if (unsortedCites[p] != sortedCites[p]) {
                    myPos2 = sortIndices[p];
                    txtOut = sortedCites[p];
                    //              txtOut = sortedCites[p].replace(/\+/g, " ");
                    //              txtOut = txtOut.replace(/\&/g, " ");
                    //txtOut = txtOut.replace(/\%3A/g, ":");
                    //              txtOut = txtOut.replace(/\%2C/g, ",");

                    newcites[myPos2].innerHTML +=
                        " <strong class=warning> Sort error, expected: <u>" + txtOut + "</u>; </strong>";
                }
            }
        }
    }
});