Jump to content

User:BacDiveBot

From Wikipedia, the free encyclopedia

Task 1

[edit]

Setting {{taxonbar}} for bacteria. Permission: Wikipedia:Bots/Requests for approval/BacDiveBot

import csv
import pywikibot as pwb
from pywikibot import pagegenerators as pg
import sys

bacdive_query = """
SELECT ?item ?any
WHERE
{
    ?item wdt:P2946 ?any .
    FILTER EXISTS {
      ?enWiki schema:about ?item .
      ?enWiki schema:inLanguage "en" .
    }
}
"""

wd_site = pwb.Site("wikidata", "wikidata")
wiki_site = pwb.Site("en", "wikipedia")
generator = pg.WikidataSPARQLPageGenerator(bacdive_query, wd_site)

def insert_template(page):
    text = page.get()
    print(page.get())
    array = text.split('[[Category:', 1)

    print(array)

    if len(array) > 1:
        page.text = array[0] + "{{taxonbar}}\n\n[[Category:" + array[1]
    else:
        page.text = text + "\n\n{{taxonbar}}"

    page.save(summary="Adding taxonbar template")
    sys.exit()

def find_template(page):
    """
    Gets a page link as a string, opens the page and parses it
    """

    page_text = page.text

    tmpl_list = pwb.textlib.extract_templates_and_params(page_text)

    for tmpl in tmpl_list:
        print(tmpl[0])
        if tmpl[0] == "taxonbar":
            print("Taxonbar found")
            return True

    return False

def iterate_pages(csvfile, writer):
    enwikitotal = 0
    taxobox = 0

    for item in generator:
        item_dict = item.get()
        template_found = False

        try:
            title = item_dict["labels"]["en"]
        except:
            title = item.id

        print("===={}====".format(title))

        try:
            link = item.getSitelink(wiki_site)
        except:
            link = None
            print("No en-wiki sitelink")

        if link:
            page = pwb.Page(wiki_site, link)
            template_found = find_template(page)

            if template_found == False:
                insert_template(page)
                break


        writer.writerow({"taxon-name": title, "wikidata-id": item.id,
                         "en-wiki-link": link, "taxobox": template_found})
        csvfile.flush()

        if link:
            enwikitotal += 1
        if template_found:
            taxobox += 1

        print(link, taxobox)

with open('current.csv', 'w') as csvfile:
    header = ["taxon-name", "wikidata-id", "en-wiki-link", "taxobox"]
    writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=header)
    writer.writeheader()
    iterate_pages(csvfile, writer)