User:ChristieBot/GA.py
Appearance
''' Copyright (c) 2022 Mike Christie Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' import urllib.parse import re import datetime import pywikibot pywikibot.config.max_retries=2 import pymysql import operator import GA_config import sys from dateutil.parser import parse class Topic: @classmethod def initialize(cls): topics = {} subtopics = {} topic = Topic("Agriculture, food and drink") subtopic = Subtopic("Agriculture, food and drink", topic, ["FARM","FOOD"], 'This includes agriculture and farming, horticulture and forestry, restaurants, cuisines, food, drink, food and drink companies, food and drink people, and cookery books.', 'Tango icon nature.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Art and architecture") subtopic = Subtopic("Art and architecture", topic, ["ART","ARCH"], 'This includes art, architecture, religious architecture, artists, architects, museums, and galleries.','Nuvola apps package graphics.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Engineering and technology") subtopic = Subtopic("Computing and engineering", topic, ['COMP','ENG'], 'This includes computer-related businesses and businesspeople, cryptography, engineers and inventors, engineering technology, engineering failures and disasters, hardware, standards and protocols, programming, software, websites, and the Internet.','Nuvola apps display.png') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic('Transport',topic,['TRANS'],'This includes air transport, maritime transport, rail transport, rail bridges/tunnels/stations, trains and locomotives, road infrastructure, road transportation and policy, and transport by region.','Nuvola apps ksysv.png') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Geography and places") subtopic = Subtopic("Geography", topic, ["GEO"], 'This includes bodies of water and water formations, geographers and explorers, human geography, islands, landforms, national and state parks, nature reserves, conservation areas, countryside routes, and urban/historical sites. Note: This does not include urban public parks, which are added to the Recreation subtopic.', 'Gnome-globe.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Places", topic, ["PLACE"], 'This includes countries, states, counties, cities, neighborhoods, and other political designations in Africa, Antarctica, Asia, Australia and the Pacific, Europe, Middle East, North America, and South America.', 'P countries-vector.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("History") subtopic = Subtopic("Royalty, nobility and heraldry", topic, ["ROYAL"], 'This includes flags and heraldry, historical monarchs, royalty, and nobility.', 'Azure-Cross-Or-Heraldry.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("World history", topic, ["HIST"], 'This includes archaeology and archaeologists, historians and chroniclers, history books, historical heads of state and government, historical politicians, historical figures, African history, North American history, South American history, Asian history, Australian and Oceania history, European history, Middle Eastern history, and global history. Note: This does not include historical royalty or nobility, who are added to the Royalty, nobility and heraldry subtopic.', 'Greek deity head icon.png') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Language and literature") subtopic = Subtopic("Language and literature", topic, ["LANG","LIT"], "This includes alphabets and transliteration, ancient texts, biographies, autobiographies, essays, diaries, and travelogues, characters and fictional items, children's stories, fairy tales, and nursery rhymes, comics, literary genres, literary theory, languages, nonfiction, novels, plays, poetry, short fiction/anthologies, words and linguistics, writers, publishers, and critics.", 'Nuvola apps fonts.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Mathematics") subtopic = Subtopic("Mathematics and mathematicians", topic, ["MATH","MATHS"], 'This includes mathematics, mathematical problems, and mathematicians.', 'Nuvola apps edu mathematics-p.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Media and drama") subtopic = Subtopic("Film", topic, ["FILM"], 'This includes film overview articles, film franchises, and film titles. Note: Articles on films that have not yet been released are not eligible to be nominated, as details within the article will change after the film''s release.', 'Film reel.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Media and drama", topic, ["STAGE"], 'This includes people in the media (including actors, directors, models, performers, and celebrities), fictional characters and technologies, animation, cinema, radio, theatre, musical theatre, dance, and opera.', 'Drama-icon.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Television", topic, ["TV"], 'This includes television overview articles, television networks, television series, television episodes and specials, and television characters.', 'TV-icon-2.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Music") subtopic = Subtopic("Albums", topic, ["ALBUM"], 'This includes record albums, soundtracks, and video albums.', 'CD icon test.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Songs", topic, ["SONG"], 'This includes songs from any era.', 'Song icon.png') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Other music articles", topic, ["MUS"], 'This includes music awards, music by nation/people/region/country, music genres, music styles, music eras, musical theory, musical instruments, music techniques, music businesses and events, music compositions, performers, groups, composers, and other music people.', 'Saxophone-icon.svg') topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Natural sciences") subtopic = Subtopic("Biology and medicine", topic, ["BIO","MED"], "This includes biology (including biologists, biology books, and evolution/reproduction), medicine (including medicine books, diseases/medical conditions, history of medicine, medical people/institutions, and medical procedures), pharmacology (including vaccines and drug classes), viruses, and organisms (including bacterial species, protists, fungi, plants, and animals such as mammals, birds, dinosaurs, reptiles/amphibians, fish, arthropods, other invertebrates, and domestic animals).","DNA icon.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Chemistry and materials science", topic, ["CHEM","MATSCI"], "This includes chemistry theory, chemistry books, types of chemical analyses, types of chemical transformations, named reactions, chemical compounds and materials, chemical substructures and groups, elements, chemistry and materials science organizations, and chemists/materials scientists.", "Nuvola apps edu science.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Earth sciences", topic, ["EARTH","CLIMATE","WEATHER"], "This includes geology (including geologists, geophysics and geophysicists, mineralogy and mineralogists, and earthquakes) and meteorology (meteorological observatories, storm sciences, tropical cyclones, tropical cyclone seasons, storm effects, weather, and winter storms), oceanography and climate sciences (current climate change and paleoclimate).", "Jordens inre.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Physics and astronomy", topic, ["PHYS","ASTRO"], "This includes physics and physicists, astronomy and astronomers, astrophysics and astrophysicists, the solar system, constellations and asterisms, stars, galaxies, extrasolar objects, and rocketry/spaceflight.", "Nuvola apps katomic.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Philosophy and religion") subtopic = Subtopic("Philosophy and religion", topic, ["PHIL","REL"], "This includes divinities and protohistoric figures, myths, mythology, and miracles, philosophies and philosophers, philosophical movements, philosophical doctrines/teachings/texts/symbols, religions and religious figures, religious movements, religious congregations and organizations, religious doctrines/teachings/texts/symbols. Note: This does not include religious buildings, which are added to the Art and architecture subtopic.", "Yin yang.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Social sciences and society") subtopic = Subtopic("Culture, sociology and psychology", topic, ["CULTURE","SOC","SOCIO","PSYCH"], "This includes culture and cultural studies, cultural symbols, Internet culture, cultural organizations and events, ethnic groups, psychology and psychologists, anthropology and anthropologists, sociology and sociologists, and globalization.", "Nuvola apps kuser.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Education", topic, ["EDU"], "This includes education, educators, and educational institutions.", "Nuvola apps edu miscellaneous.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Economics and business", topic, ["ECON","BUS"], "This includes advertising and marketing, businesspeople, businesses and organizations, economics, and numismatics/currencies.", "Nuvola mimetypes kchart chrt.png") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Law", topic, ["LAW"], "This includes law cases, domestic law, constitutional law, international law, crime, criminals, punishment, victims, ethics, lawyers, judges/legal academics, and legal institutions/buildings.", "Icon-Rechtshinweis-blau2-Asio.png") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Magazines and print journalism", topic, ["MEDIA","PRINT"], "This includes journalism and journalists, newspapers, magazines, and journals.", "Canon EOS Rebel.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Politics and government", topic, ["POL","GOVT"], "This includes heads of state and government, intelligence and espionage, international organizations, political and governmental institutions, political districts, political direction and governance, political events and elections, political figures, political issues, political theory and analysis, and political parties/movements. Note: This does not include deceased politicians or former heads of state and government, who are added to the World history subtopic.", "Vote.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Sports and recreation") subtopic = Subtopic("Football", topic, ["FOOTBALL"], "This includes association football (soccer), Australian rules football, Gaelic football, gridiron football (including American football, arena football, and Canadian football), international rules football, rugby league, rugby union, and historical forms of football.", "Soccerball.jpg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Other sports", topic, ["SPORT"], "This includes all sports other than football, including baseball (including baseball teams/events and baseball people), basketball (basketball teams/events and basketball people), cricket (including cricket teams/events and people), hockey (field and ice hockey teams/events/arenas and hockey people), pro wrestling (events, people, and organizations), cue sports (billiards, pool, snooker, and other cue sports), and other sports (including curling, cycling, equestrianism, golf, lacrosse, mixed martial arts/boxing, Motorsport, Olympics and Paralympics, rowing, running, track and field, skating, skiing, swimming/water sports, tennis, chess, sports mascots/supporters, and sports miscellanea).", "Baseball.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic subtopic = Subtopic("Recreation", topic, ["REC"], "This includes board/card/role-playing games, poker, toys, zoos, public parks, and amusements.", "Playing card heart 3.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Video games") subtopic = Subtopic("Video games", topic, ["VG"], "This includes early video games, video game titles, video game series, video game characters, video game genres, video game systems/services, video game history and development, the video game industry, video game developers, video game terms, and game elements.", "WPVG icon 2016.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Warfare") subtopic = Subtopic("Warfare", topic, ["WAR"], "This includes armies and military units, battles, exercises, and conflicts (from ancient history through the present), massacres, war crimes, and legal issues of warfare, military aircraft, military awards and decorations, military museums and memorials, military people, warships and naval units (including ship types, naval technology, and warships by country) weapons, equipment, and buildings (including military programs, uniforms, installations, and castles).", "Miecze.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic topic = Topic("Miscellaneous") subtopic = Subtopic("Miscellaneous", topic, ["MISC"], "This includes anything that does not fit into one of the topics above.", "Symbol dot dot dot gray.svg") topic.add_subtopic(subtopic) topics[topic.name] = topic subtopics[subtopic.name] = subtopic return [topics, subtopics] def __init__(self, name): self.name = name self.subtopics = [] def add_subtopic(self, subtopic): self.subtopics.append(subtopic) def header(self, target): header = "" if target == 'Wikipedia': header = "==" + self.name + "==\n" return header class Subtopic: # A dictionary to look up what subtopic corresponds to an abbreviated form. # Note that the subtopic name itself has to be in this dictionary -- e.g. subtopic_var_dict['agriculture, food and drink']='Agriculture, food and drink' # is necessary. This is because the nom __init__ checks for this. That may be removable but for now it's easier to leave it as is. subtopic_var_dict = {} subtopic_var_dict['agriculture, food and drink']='Agriculture, food and drink' subtopic_var_dict['agriculture']='Agriculture, food and drink' subtopic_var_dict['cuisine']='Agriculture, food and drink' subtopic_var_dict['cuisines']='Agriculture, food and drink' subtopic_var_dict['cultivation']='Agriculture, food and drink' subtopic_var_dict['drink']='Agriculture, food and drink' subtopic_var_dict['farming and cultivation']='Agriculture, food and drink' subtopic_var_dict['farming']='Agriculture, food and drink' subtopic_var_dict['food and drink']='Agriculture, food and drink' subtopic_var_dict['food']='Agriculture, food and drink' subtopic_var_dict['art and architecture']='Art and architecture' subtopic_var_dict['art']='Art and architecture' subtopic_var_dict['architecture']='Art and architecture' subtopic_var_dict['engineering and technology']='Computing and engineering' subtopic_var_dict['engtech']='Computing and engineering' subtopic_var_dict['applied sciences and technology']='Computing and engineering' subtopic_var_dict['applied sciences']='Computing and engineering' subtopic_var_dict['computers']='Computing and engineering' subtopic_var_dict['computing and engineering']='Computing and engineering' subtopic_var_dict['computing']='Computing and engineering' subtopic_var_dict['eng']='Computing and engineering' subtopic_var_dict['engineering']='Computing and engineering' subtopic_var_dict['technology']='Computing and engineering' subtopic_var_dict['transport']='Transport' subtopic_var_dict['geography and places']='Geography' subtopic_var_dict['geography']='Geography' subtopic_var_dict['places']='Places' subtopic_var_dict['history']='World history' subtopic_var_dict['archaeology']='World history' subtopic_var_dict['world history']='World history' subtopic_var_dict['royalty, nobility and heraldry']='Royalty, nobility and heraldry' subtopic_var_dict['heraldry']='Royalty, nobility and heraldry' subtopic_var_dict['nobility']='Royalty, nobility and heraldry' subtopic_var_dict['royalty']='Royalty, nobility and heraldry' subtopic_var_dict['language and literature']='Language and literature' subtopic_var_dict['langlit']='Language and literature' subtopic_var_dict['languages and linguistics']='Language and literature' subtopic_var_dict['languages and literature']='Language and literature' subtopic_var_dict['languages']='Language and literature' subtopic_var_dict['linguistics']='Language and literature' subtopic_var_dict['lit']='Language and literature' subtopic_var_dict['literature']='Language and literature' subtopic_var_dict['mathematics and mathematicians']='Mathematics and mathematicians' subtopic_var_dict['mathematics']='Mathematics and mathematicians' subtopic_var_dict['math']='Mathematics and mathematicians' subtopic_var_dict['maths']='Mathematics and mathematicians' subtopic_var_dict['media and drama']='Media and drama' subtopic_var_dict['drama']='Media and drama' subtopic_var_dict['ballet']='Media and drama' subtopic_var_dict['dance']='Media and drama' subtopic_var_dict['film']='Film' subtopic_var_dict['films']='Film' subtopic_var_dict['media']='Media and drama' subtopic_var_dict['opera']='Media and drama' subtopic_var_dict['television']='Television' subtopic_var_dict['theater']='Media and drama' subtopic_var_dict['theatre']='Media and drama' subtopic_var_dict['theatre, film and drama']='Media and drama' subtopic_var_dict['music']='Other music articles' subtopic_var_dict['classical compositions']='Other music articles' subtopic_var_dict['other music articles']='Other music articles' subtopic_var_dict['albums']='Albums' subtopic_var_dict['songs']='Songs' subtopic_var_dict['natural sciences']='Physics and astronomy' subtopic_var_dict['natsci']='Physics and astronomy' subtopic_var_dict['astronomy']='Physics and astronomy' subtopic_var_dict['astrophysics']='Physics and astronomy' subtopic_var_dict['cosmology']='Physics and astronomy' subtopic_var_dict['natural science']='Physics and astronomy' subtopic_var_dict['natural sciences']='Physics and astronomy' subtopic_var_dict['physics and astronomy']='Physics and astronomy' subtopic_var_dict['physics']='Physics and astronomy' subtopic_var_dict['biology and medicine']='Biology and medicine' subtopic_var_dict['biology']='Biology and medicine' subtopic_var_dict['medicine']='Biology and medicine' subtopic_var_dict['chemistry and materials science']='Chemistry and materials science' subtopic_var_dict['chemistry']='Chemistry and materials science' subtopic_var_dict['atmospheric science']='Earth sciences' subtopic_var_dict['earth science']='Earth sciences' subtopic_var_dict['earth sciences']='Earth sciences' subtopic_var_dict['geology']='Earth sciences' subtopic_var_dict['geophysics']='Earth sciences' subtopic_var_dict['meteorology and atmospheric science']='Earth sciences' subtopic_var_dict['mineralogy']='Earth sciences' subtopic_var_dict['meteorology']='Earth sciences' subtopic_var_dict['philosophy and religion']='Philosophy and religion' subtopic_var_dict['philrelig']='Philosophy and religion' subtopic_var_dict['mysticism']='Philosophy and religion' subtopic_var_dict['myth']='Philosophy and religion' subtopic_var_dict['mythology']='Philosophy and religion' subtopic_var_dict['phil']='Philosophy and religion' subtopic_var_dict['philosophy']='Philosophy and religion' subtopic_var_dict['relig']='Philosophy and religion' subtopic_var_dict['religion']='Philosophy and religion' subtopic_var_dict['religion, mysticism and mythology']='Philosophy and religion' subtopic_var_dict['culture, sociology and psychology']='Culture, sociology and psychology' subtopic_var_dict['culture, society and psychology']='Culture, sociology and psychology' subtopic_var_dict['culture, sociology, and psychology']='Culture, sociology and psychology' subtopic_var_dict['culture, society, and psychology']='Culture, sociology and psychology' subtopic_var_dict['social sciences and society']='Culture, sociology and psychology' subtopic_var_dict['socsci']='Culture, sociology and psychology' subtopic_var_dict['culture and society']='Culture, sociology and psychology' subtopic_var_dict['culture']='Culture, sociology and psychology' subtopic_var_dict['psychology']='Culture, sociology and psychology' subtopic_var_dict['social science']='Culture, sociology and psychology' subtopic_var_dict['social sciences']='Culture, sociology and psychology' subtopic_var_dict['society']='Culture, sociology and psychology' subtopic_var_dict['education']='Education' subtopic_var_dict['economics and business']='Economics and business' subtopic_var_dict['business and economics']='Economics and business' subtopic_var_dict['business']='Economics and business' subtopic_var_dict['economics']='Economics and business' subtopic_var_dict['law']='Law' subtopic_var_dict['journalism and media']='Magazines and print journalism' subtopic_var_dict['journalism']='Magazines and print journalism' subtopic_var_dict['magazines and print journalism']='Magazines and print journalism' subtopic_var_dict['media and journalism']='Magazines and print journalism' subtopic_var_dict['gov']='Politics and government' subtopic_var_dict['government']='Politics and government' subtopic_var_dict['politics and government']='Politics and government' subtopic_var_dict['politics']='Politics and government' subtopic_var_dict['sports and recreation']='Other sports' subtopic_var_dict['other sports']='Other sports' subtopic_var_dict['sports']='Other sports' subtopic_var_dict['everyday life']='Recreation' subtopic_var_dict['everydaylife']='Recreation' subtopic_var_dict['games']='Recreation' subtopic_var_dict['recreation']='Recreation' subtopic_var_dict['sports (other)']='Other sports' subtopic_var_dict['sport and recreation']='Other sports' subtopic_var_dict['sport']='Other sports' subtopic_var_dict['sports (football)']='Football' subtopic_var_dict['football']='Football' subtopic_var_dict['association football']='Football' subtopic_var_dict['soccer']='Football' subtopic_var_dict['american football']='Football' subtopic_var_dict['canadian football']='Football' subtopic_var_dict['gridiron football']='Football' subtopic_var_dict['gaelic football']='Football' subtopic_var_dict['australian football']='Football' subtopic_var_dict['australian rules football']='Football' subtopic_var_dict['rugby']='Football' subtopic_var_dict['rugby union']='Football' subtopic_var_dict['rugby league']='Football' #subtopic_var_dict['sports and recreation']='Sports and recreation' #subtopic_var_dict['sports']='Sports and recreation' #subtopic_var_dict['everyday life']='Sports and recreation' #subtopic_var_dict['everydaylife']='Sports and recreation' #subtopic_var_dict['games']='Sports and recreation' #subtopic_var_dict['recreation']='Sports and recreation' #subtopic_var_dict['sport and recreation']='Sports and recreation' #subtopic_var_dict['sport']='Sports and recreation' subtopic_var_dict['video games']='Video games' subtopic_var_dict['video and computer games']='Video games' subtopic_var_dict['warfare']='Warfare' subtopic_var_dict['war']='Warfare' subtopic_var_dict['aircraft']='Warfare' subtopic_var_dict['battles and exercises']='Warfare' subtopic_var_dict['battles']='Warfare' subtopic_var_dict['decorations and memorials']='Warfare' subtopic_var_dict['military']='Warfare' subtopic_var_dict['military people']='Warfare' subtopic_var_dict['units']='Warfare' subtopic_var_dict['war and military']='Warfare' subtopic_var_dict['warships']='Warfare' subtopic_var_dict['weapons and buildings']='Warfare' subtopic_var_dict['weapons']='Warfare' subtopic_var_dict['miscellaneous']='Miscellaneous' def __init__(self, name, topic, anchors, intro_text, icon_file): self.topic = topic self.name = name self.anchors = anchors self.intro_text = intro_text self.icon_file = icon_file self.nom_list = {} self.section_header_line = "=== " + self.name + " ===" self.icon_line = '[[File:' + self.icon_file + '|22px|left]]' self.anchor_line = '' self.shortcut_line = '' if len(anchors) > 0: self.shortcut_line = '{{shortcut' for a in anchors: self.anchor_line += '{{anchor|' + a + '}}' self.shortcut_line += '|WP:GAN#' + a self.shortcut_line += '}}' def add_nom(self, nom): self.nom_list[nom.title] = nom def section_header(self): header_rows = [ "", self.icon_line, self.anchor_line, "", self.section_header_line, self.shortcut_line, "", "::" + self.intro_text, "" ] return('\n'.join(header_rows)) class Nom: # Class variable dictionary to hold the status letter/status string relationship status_strings = { 'R' : 'Under review', '2' : '2nd opinion', 'H' : 'On hold', 'P' : 'Passed', 'F' : 'Failed', 'X' : 'Removed', '' : 'Start review' } status_template_strings = { 'R' : 'onreview', '2' : '2ndopinion', 'H' : 'onhold', '' : '' } @classmethod def is_a_GA(cls, page): # returns True if the article has the GA template on the talk page or the GA template their article page return ('Category:Wikipedia good articles' in [x.title() for x in page.categories()] or 'Category:Good articles' in [x.title() for x in page.categories()]) @classmethod def is_an_FA(cls, page): # returns True if the article is featured; works for both article and talk page return ('Category:Wikipedia featured articles' in [x.title() for x in page.categories()] or 'Category:Featured articles' in [x.title() for x in page.categories()]) @classmethod def is_an_FFA(cls, page): # returns True if the article is a former featured article; works for both article and talk page if page.title()[:5] == "Talk:": talk_page = page else: site = pywikibot.Site('en','wikipedia') talk_page = pywikibot.Page(site,"Talk:" + page.title()) return 'Category:Wikipedia former featured articles' in [x.title() for x in talk_page.categories()] def __init__(self, conn, topic, subtopic, title, status, page_num, nominator, nomination_ts, note, shortdesc): #GAN.log(conn, "Nom:init","Called with page:",page_num) if subtopic.lower() not in Subtopic.subtopic_var_dict.keys(): self.subtopic = 'Miscellaneous' else: self.subtopic = Subtopic.subtopic_var_dict[subtopic.lower()] self.topic = topic self.title = title self.status = status self.page_num = page_num self.nominator = nominator self.nomination_ts = nomination_ts self.note = note self.shortdesc = shortdesc self.nominator_reviews = 0 self.nominator_GAs = 0 self.nominator_edit_age = 0 # time in days since nominator's last edit self.reviewer_reviews = 0 self.reviewer_GAs = 0 self.reviewer_edit_age = 0 # time in days since reviewer's last edit self.nom_edits = 0 self.reviewer = None self.review_started_ts = None self.custom_sort_2 = "" self.edit_summary = "" self.status_template_string = "" self.warnings = [] self.get_review_info(title, page_num) self.hold_ts = None # Time when the nomination was placed on hold self.second_opinion_ts = None # Time when the nomination status was changed to second opinion requested if status not in Nom.status_strings.keys(): # Change an invalid status to waiting for review status = '' self.status_string = Nom.status_strings[status] if status not in Nom.status_template_strings.keys(): # Change an invalid status to waiting for review self.status_template_string = Nom.status_template_strings[''] else: self.status_template_string = Nom.status_template_strings[status] self.status_string = Nom.status_strings[status] if not self.review_page_exists and self.status not in ('X',''): self.warnings.append("Status indicates review has started but there is no review page") GA_config.current_errors.append("Status of [[" + self.title + "]] / " + str(self.page_num) + " is " + self.status + " indicating review has started but there is no review page\n") # Now we need to set the second opinion and hold timestamps, if they exist. These are not shown on the review or talk pages, so we have to get them from the active_nominations table. if self.review_started_ts != None: active_nomination = Active_nomination.get_active_nomination(conn, title) if active_nomination == None: # If we're here, this is the first time we've seen this nomination, since it's not in the active_nominations stored during the last run. if self.status == 'H': self.hold_ts = datetime.datetime.utcnow() if self.status == '2': self.second_opinion_ts = datetime.datetime.utcnow() else: self.hold_ts = active_nomination['hold_ts'] self.second_opinion_ts = active_nomination['second_opinion_ts'] age_of_nom = datetime.datetime.utcnow() - self.nomination_ts self.age_in_days = age_of_nom.days self.age_in_seconds_without_days = age_of_nom.seconds self.R_over_G = 0 self.R_plus_one_over_G = 0 self.recalculate_RG() self.recalculate_R_plus_one_over_G() self.recalculate_custom_sort() def get_review_info(self, title, page_num): self.review_page_name = "Talk:" + title + "/GA" + str(page_num) self.reviewer = None self.review_started_ts = None self.review_page_exists = False site = pywikibot.Site('en','wikipedia') review_page = pywikibot.Page(site, self.review_page_name) try: self.review_page_exists = review_page.exists() except pywikibot.exceptions.ServerError as e: GAN.log(conn,"get_review_info: exceptions",title,"Server error testing existence of review page") if self.review_page_exists: # If it exists we still need to check if it's a redirect try: review_page.get() except pywikibot.exceptions.IsRedirectPageError as e: # If it's a redirect, try getting the target page instead review_page_redir_target = pywikibot.Page(site,e.title[2:-2]) if review_page_redir_target.exists(): review_page = review_page_redir_target else: # Here the review page is a redirect but it doesn't redirect to an actual page review_page_exists = False if self.review_page_exists: try: self.reviewer = review_page.oldest_revision['user'] self.review_started_ts = review_page.oldest_revision['timestamp'] except pywikibot.exceptions.ServerError as e: GAN.log(conn,"get_review_info: exceptions",title,"Server error retrieving oldest revision of review page") def update_timestamps(self, old_status): if self.status == 'H' and old_status in ['','2','R']: self.hold_ts = datetime.datetime.utcnow() if self.status == '2' and old_status in ['','H','R']: self.second_opinion_ts = datetime.datetime.utcnow() def add_GA_star(self, conn): site = pywikibot.Site('en','wikipedia') article_page = pywikibot.Page(site, self.title) # May need to eventually check that this is not a redirect, and go to the target if it is. ga_offset = article_page.text.find('{{good article}}') # Check if the star is already in the article if ga_offset == -1: if GA_config.is_live: # The star should be after the short description and DISPLAYTITLE if they exist. It should also be after any hatnotes but that is not currently implemented. sd_offset = 0 sd_re = re.search('{{Short description[^}]*}}', article_page.text, re.IGNORECASE) if sd_re is not None: sd_offset = sd_re.span()[1] DT_offset = 0 DT_re = re.search('{{DISPLAYTITLE[^}]*}}', article_page.text) if DT_re is not None: DT_offset = sd_re.span()[1] insert_offset = sd_offset if DT_offset > sd_offset: insert_offset = DT_offset if article_page.text[insert_offset:insert_offset + 1] == "\n": insert_offset += 1 # Because Python treats \n as two characters on read but one character in the string. I think. article_page.text = article_page.text[:insert_offset] + "{{good article}}\n" + article_page.text[insert_offset:] GAN.log(conn, "add_GA_star", self.title, "new article text is" + article_page.text[:200] + " ...") article_page.save("Add good article icon") article_page = pywikibot.Page(site, self.title) # Now the star is added, refresh the page since we want the newest revid for oldid oldid = article_page.latest_revision.revid talk_page = pywikibot.Page(site,'Talk:' + self.title) GAN_re = re.search('{{GA\|',talk_page.text) if GAN_re == None: # Check if it was added to article history -- if it was we don't have an error if talk_page.text.lower().find("{{article history") < 0 and talk_page.text.find("/GA" + str(self.page_num)) < 0: GAN.notify_error("Adding oldid","Searching for GA template","Can't find GA template or article history link in Talk:" + self.title) GAN.log(conn,"add_GA_star","Searching for GA template","Can't find GA template or article history link in Talk:" + self.title) return None GAN_start = GAN_re.span()[0] GAN_text = talk_page.text[GAN_start:] # Strips everything before the template GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA template text and nothing else if GAN_text.find("oldid") < 0: # Don't add oldid if it's already there new_GAN_text = GAN_text[0:-2] + "|oldid=" + str(oldid) + "}}" talk_page.text = talk_page.text[:GAN_re.span()[0]] + new_GAN_text + talk_page.text[GAN_re.span()[0] + len(GAN_text):] #print(talk_page.text) if GA_config.is_live: talk_page.save("Add oldid for good article") # UPDATE ON GO LIVE #else: #print("Not adding oldid to Talk:" + self.title) def update_users(self, conn, users, name_changes, review_stats, wbgan): #TODO if the namespace of a redirected user page is not user or user talk, ignore it #print("Called update_users:") #print("Self is " + str(self)) #print("Title is " + str(self.title)) #print("Nominator is " + str(self.nominator)) site = pywikibot.Site('en','wikipedia') if self.nominator not in users.keys(): self.nominator_reviews = review_stats.get_review_count(self.nominator, name_changes) self.nominator_GAs = WBGAN.get_GA_count(wbgan, self.nominator, name_changes) u = pywikibot.User(site,'User:' + self.nominator) target_user = GAN.get_link_redirect_target(conn, 'User:' + self.nominator) GAN.log(conn,"update_users", self.title, str(target_user)) if target_user is not None: GAN.log(conn,"update_users", self.title, "find offset for slash = " + str(target_user.title().find("/"))) if target_user is not None and target_user.title().find("/") < 0: # We don't want to follow the redirect if it has a slash because that's not a real user page u = pywikibot.User(site, target_user.title().replace(" talk","")) #print("User is " + str(u)) #print("Last edit is " + str(u.last_edit)) GAN.log(conn,"update_users", self.title, "nominator = " + self.nominator) GAN.log(conn,"update_users", self.title, "u = " + u.username) if target_user is None: GAN.log(conn,"update_users", self.title, "target_user = None") else: GAN.log(conn,"update_users", self.title, "target_user = " + target_user.title()) u_last_edit = None try: u_last_edit = u.last_edit except pywikibot.exceptions.ServerError as e: GAN.log(conn,"update_users:exceptions",link, "Pywikibot server exception " + str(e) + " when trying to get user last edit") u_last_edit = 'Error' if u_last_edit in [None, 'Error']: users[self.nominator] = { 'reviews': 0, 'GAs': 999, 'edit_age': self.nominator_edit_age } if u_last_edit == None: GA_config.current_errors.append("No user edits found for user " + str(u) + " -- user may have been renamed. See [[User:ChristieBot#What to do if your username changes]] for how to fix this.") else: ule = u.last_edit[2] ule_delta = datetime.datetime.utcnow() - ule self.nominator_edit_age = ule_delta.days users[self.nominator] = { 'reviews': self.nominator_reviews, 'GAs': self.nominator_GAs, 'edit_age': self.nominator_edit_age } self.nominator_reviews = users[self.nominator]['reviews'] self.nominator_GAs = users[self.nominator]['GAs'] self.nominator_edit_age = users[self.nominator]['edit_age'] if self.reviewer != None and self.reviewer not in users.keys() and self.reviewer != '': self.reviewer_reviews = review_stats.get_review_count(self.reviewer, name_changes) GAs = WBGAN.get_GA_count(wbgan, self.reviewer, name_changes) #print("About to look up reviewer <" + str(self.reviewer) + "> for <" + self.title + ">") ur = pywikibot.User(site,'User:' + str(self.reviewer)) ure = ur.last_edit[2] reviewer_delta = datetime.datetime.utcnow() - ure reviewer_edit_age = reviewer_delta.days users[self.reviewer] = { 'reviews': self.reviewer_reviews, 'GAs': GAs, 'edit_age': reviewer_edit_age } # The next calls may not be needed as I believe the recalc call in GANbot brings all this up to date. if self.reviewer != None and self.reviewer != '': self.reviewer_reviews = users[self.reviewer]['reviews'] self.reviewer_GAs = users[self.reviewer]['GAs'] self.reviewer_edit_age = users[self.reviewer]['edit_age'] self.recalculate_RG() self.recalculate_R_plus_one_over_G() self.recalculate_custom_sort() def print_GAN_entry(self): entry =[] #print("In print_GAN_entry for " + self.title + ", status = <" + self.status + ">") GAN_entry_1 = "# {{GANentry|1=" + self.title + "|2=" + str(self.page_num) + "|shortdesc=" + str(self.shortdesc) GAN_entry_2 = "}}" if self.status in ('H','R','2'): GAN_entry_2 = "|exists=yes}}" GAN_entry_3 = " (" + str(self.nominator_reviews) + " reviews, " if self.nominator_reviews == 1: GAN_entry_3 = " (1 review, " GAN_entry_4 = str(self.nominator_GAs) + " GAs) " if self.nominator_GAs == 1: GAN_entry_4 = "1 GA) " GAN_entry_5 = "[[User:" + self.nominator + "|" + self.nominator + "]] ([[User talk:" + self.nominator + "|talk]]) " + self.nomination_ts.strftime("%H:%M, %-d %B %Y (UTC)") entry.append(GAN_entry_1 + GAN_entry_2 + GAN_entry_3 + GAN_entry_4 + GAN_entry_5) if self.status in ('H','R','2'): display_ts = self.review_started_ts entry_status = '' if self.status == 'H': display_ts = self.hold_ts entry_status = '|status=on hold' elif self.status == '2': display_ts = self.second_opinion_ts entry_status = '|status=2nd opinion' display_ts_string = 'Unknown timestamp' if display_ts != None: display_ts_string = display_ts.strftime("%H:%M, %-d %B %Y (UTC)") GAR_entry_1 = "#:{{GAReview" + entry_status + "}} " GAR_entry_2 = "(" + str(self.reviewer_reviews) + " reviews, " if self.reviewer_reviews == 1: GAR_entry_2 = "(1 review, " GAR_entry_3 = str(self.reviewer_GAs) + " GAs) " if self.reviewer_GAs == 1: GAR_entry_3 = "1 GA) " #entry.append("#:{{GAReview" + entry_status + "}} (Reviews: " + str(self.reviewer_reviews) + ") [[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) " + display_ts_string) entry.append(GAR_entry_1 + GAR_entry_2 + GAR_entry_3 + "[[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) " + display_ts_string) if self.note != '': entry.append("#: '''Note:''' " + self.note) #print(self.title + " : " + str(self.nominator_edit_age) + " : " + str(self.reviewer_edit_age) + " : " + str(GA_config.inactivity_age)) if self.nominator_edit_age > GA_config.inactivity_age: entry.append('#: [[File:Exclamation mark 2.svg|14px]]Nominator inactive for ' + str(self.nominator_edit_age) + ' days') #print(self.title + " : Inactive nominator") if self.reviewer_edit_age > GA_config.inactivity_age and self.status != '2': entry.append('#: [[File:Exclamation mark 2.svg|14px]]Reviewer inactive for ' + str(self.reviewer_edit_age) + ' days') #print(self.title + " : Inactive reviewer") if len(self.warnings) > 0: entry.append("#: '''Warning:''' nomination is malformed -- " + "; ".join(self.warnings)) return("\n".join(entry)) def is_transcluded(self): # Check to see if the review is transcluded site = pywikibot.Site('en','wikipedia') talk_page = pywikibot.Page(site,'Talk:' + self.title) transclusion_string = "/GA" + str(self.page_num) + "}}" # Because of redirects (caused by page moves) it's safer to just test for the end of the string, which should still be unique. transclusion_find = talk_page.text.find(transclusion_string) if transclusion_find < 0: return False else: return True def transclude(self, conn): # Add the new review page to the article talk page and update the status site = pywikibot.Site('en','wikipedia') talk_page = pywikibot.Page(site,'Talk:' + self.title) testing_page = pywikibot.Page(site,"User talk:ChristieBot/Test") GAN_re = re.search('{{GA\s?nominee',talk_page.text) # Is there a nomination template on the talk page? update_status = True new_GAN_text = '' transclusion = '' # Log some basic information about the transclusion we're about to try to perform GAN.log(conn,"transclude",str(self.title),"nominator is " + str(self.nominator) + "; status is " + str(self.status) + "; page is " + str(self.page_num) + "; already transcluded? " + str(self.is_transcluded())) if GAN_re == None: # If we're here we couldn't find the GA nominee template. Somebody might have removed it manually. # This might mean that some of the close steps might already have been done. Check for the existence of a {{Failed GA}} template or # article history. Check if the review is already transcluded. #print("Called transclude; status " + self.status) # Also set update status to False -- whatever happens in this branch we're not going to be updating the nominee template, because it's not there update_status = False fga_re = re.search('{{Failed[ ]?GA[^\}]*page\s*\=\s*' + str(self.page_num) + '[^\}]*\}\}',talk_page.text) # Search for Failed GA template if fga_re == None: GAN.log(conn,"transclude",str(self.title), "GA nominee template not on talk page -- couldn't find Failed GA") # Can't find FailedGA, so look for articlehistory # Article history will have the form {{article history ... actionNlink=Talk:Title/GAX...}} where X is the page number # We don't care about the outcome, just that it's recorded ah_re = re.search('{{article[ ]?history[^\}]*action.link\s*=\s*talk:'+self.title.lower() + '/ga' + str(self.page_num), talk_page.text.lower()) if ah_re == None: GAN.log(conn,"transclude",str(self.title), "GA nominee template not on talk page -- couldn't find Article history") # If we're here we've given up; there's no failed GA and no article history so report an error # I suspect this is not an error if the status is P -- there doesn't have to be either if the GA template is there. # If so then an additional check here for the {{GA}} template would be useful. # The error is reported as "Could not find nominee template" because it's not there and we don't know why it's not there. GA_config.current_errors.append("\nCould not find nominee template in Talk:" + self.title + " when trying to update status") else: GAN.log(conn,"transclude",str(self.title), "Found article history") # We found article history with the GA in it so we can assume it was closed correctly else: GAN.log(conn,"transclude",str(self.title), "Found Failed GA") # We found a Failed GA template so we can assume it was closed correctly else: # Here we did find the nominee template so we need to update it GAN_start = GAN_re.span()[0] GAN_text = talk_page.text[GAN_start:] GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else x = talk_page.text[0:GAN_re.span()[0]] # x is the part of the talk page prior to the nominee template y = GAN_text z = talk_page.text[len(x) + len (y):] # z is the part of the talk page after the nominee template #status_match = re.search('\|[\s]*status[\s]*=[\s]*[^\|]*[\|\}]', GAN_text) # Original # The above regex doesn't work if the status is the last parameter. May need to search for that separately because the length will be different. status_match = re.search('\|[\s]*status[\s]*=[\s]*[^\|\}]*(\||\}\})', GAN_text) if status_match == None: GA_config.current_errors.append("\nCould not find status parameter in template in Talk:" + self.title + " when trying to update status") else: #print(self.title + ": status is " + self.status) if self.status == '': # If we're transcluding a review and the reviewer didn't update the status in the nominee template, we should set the status to onreview self.update_status('R') #print("before new_GAN_text: status_template_string = " + self.status_template_string) trailing_template_text = GAN_text[status_match.span()[1]-1:] # This works if the status parameter is not Last if GAN_text[status_match.span()[1] - 1] != "|": # if we didn't find a pipe we found the trailing braces trailing_template_text = "}}" #new_GAN_text = GAN_text[0:status_match.span()[0]] + "|status=" + self.status_template_string + GAN_text[status_match.span()[1]-1:] new_GAN_text = GAN_text[0:status_match.span()[0]] + "|status=" + self.status_template_string + trailing_template_text #print("new GAN text ends as " + new_GAN_text) talk_page.text = x + new_GAN_text + z # Status is either updated or we skipped it, and the talk page text is not saved if it was updated. # Now append the transclusion, but only if not already transcluded talk_page_is_editable = True GAN.log(conn, "transclude:protected page",self.title,"Before check for whether it's transcluded: talk_page_is_editable is " + str(talk_page_is_editable)) if not self.is_transcluded(): transclusion = "\n{{Talk:" + self.title + "/GA" + str(self.page_num) + "}}\n" try: GAN.log(conn, "transclude:protected page",self.title,"About to add transclusion to text") talk_page.text += transclusion except pywikibot.exceptions.UnsupportedPageError as e: GAN.log(conn, "transclude:protected page",self.title,"Can't transclude the review: " + str(e)) talk_page_is_editable = False if GA_config.is_live: if talk_page_is_editable: talk_page.save("Transcluding GA review") # Update the review page with a line showing the nominator. # First get the review page text review_page = pywikibot.Page(site,self.review_page_name) reviewer_line_re = re.search('\'\'\'Reviewer:\'\'\'',review_page.text) # Is the "'''Reviewer:'''" line still in the review page? if reviewer_line_re == None: # Here something has removed the reviewer info, so we don't know where to put the nominator info, and we bail OUT GAN.log(conn, "transclude","checking for reviewer line","No reviewer line found") else: # We found it so we can insert the nominator line. reviewer_line_start = reviewer_line_re.span()[0] review_page_first_part = review_page.text[:reviewer_line_start] review_page_last_part = review_page.text[reviewer_line_start:] review_page.text = review_page_first_part + "\'\'\'Nominator:\'\'\' {{User|" + self.nominator + "}} " + self.nomination_ts.strftime("%H:%M, %-d %B %Y (UTC)") + "\n\n" + review_page_last_part review_page.save("Adding nominator information") #GAN.log(conn,"transclude","inserting nominator line", "new page is " + review_page.text) else: testing_page.text += "==Transcluding GA review for " + self.title + "==" if self.is_transcluded(): testing_page.text += "\nNot transcluding review -- already transcluded" else: testing_page.text += "\nTransclusion text: </nowiki>" + transclusion + "</nowiki>" if new_GAN_text == '': testing_page.text += "\nNew status text: " + new_GAN_text + "" else: testing_page.text += "\nNew status text: " + new_GAN_text + "" testing_page.save("Transcluding GA review for " + self.title) if self.status == 'R': self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer elif self.status == 'H': self.edit_summary = "On review and on hold [[" + self.title + "]] by " + self.reviewer elif self.status == '2': self.edit_summary = "On review and second opinion [[" + self.title + "]] by " + self.reviewer return True def add_a_review(self, conn): # Add a record to the reviews table. Should only be called at the time the article is placed in "on review" status. # Check the article doesn't already have a review. If it does, only insert it if the reviewer is different. review_inserted = False sql = "select reviewer from " + GA_config.strings['GA reviews table name'] + " where article_title = '" + self.title.replace("'","''") + "' and page = " + str(self.page_num) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("GANbot: add_a_review",sql,e) GAN.log(conn, "add_a_review","fetching reviewer",str(e)) return False if cursor.rowcount == 0: # Here we didn't find any reviews so we can just insert. insert_review = True else: # Iterate through the reviews we find. Notify an error for each one. If none match the reviewer we were passed, insert the review record. insert_review = True for row in cursor.fetchall(): GAN.notify_error("GANbot: add_a_review","counting reviews", "found prior review for " + str(self.title) + '/' + str(self.page_num) + " by " + str(row['reviewer']), False) if row['reviewer'] == self.reviewer: insert_review = False; if insert_review: rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S") sql = "insert into " + GA_config.strings['GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + self.reviewer.replace("'","''") + "','" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + rst_string + "')" #sql = "insert into " + GA_config.strings['GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + self.reviewer.replace("'","''") + "','" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + str(self.review_started_ts) + "')" #GAN.log(conn, "add_a_review","inserting review",sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("add_a_review",sql,e) conn.rollback() return False conn.commit() review_inserted = True return review_inserted def save_nomination_state(self, conn, name_changes, wbgan, review_stats): # save the current state of the nomination to the nominations table cursor = conn.cursor() rst_string = 'null' if self.review_started_ts is not None: rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S") rst_string = "'" + rst_string + "'" ht_string = 'null' if self.hold_ts is not None: ht_string = self.hold_ts.strftime("%Y-%m-%d %H:%M:%S") ht_string = "'" + ht_string + "'" sot_string = 'null' if self.second_opinion_ts is not None: sot_string = self.second_opinion_ts.strftime("%Y-%m-%d %H:%M:%S") sot_string = "'" + sot_string + "'" if self.reviewer == None: self.reviewer = '' if self.note == None: self.note = '' if self.note == None: self.note = '' #print("title = <" + str(self.title) + ">") #print("nominator = <" + str(self.nominator) + ">") #print("nomination_ts = <" + str(self.nomination_ts) + ">") #print("status = <" + str(self.status) + ">") #print("reviewer = <" + str(self.reviewer) + ">") #print("note = <" + str(self.note) + ">") #print("review_started_ts = <" + str(self.review_started_ts) + ">") #print("hold_ts = <" + str(self.hold_ts) + ">") #print("second_opinion_ts = <" + str(self.second_opinion_ts) + ">") #print("shortdesc = <" + str(self.shortdesc) + ">") sql = "insert into " + GA_config.strings['nominations table name'] + " (title, page, nominator, nomination_ts, status, reviewer, subtopic, note, review_started_ts, hold_ts, second_opinion_ts, shortdesc, nominator_reviews, nominator_GAs) values " sql += "('" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + self.nominator.replace("'","''") + "','" + str(self.nomination_ts) + "','" + self.status.replace("'","''") + "','" + self.reviewer.replace("'","''") + "','" + self.subtopic.replace("'","''") + "','" + self.note.replace("'","''") + "'," + rst_string + "," + ht_string + "," + sot_string + ",'" + self.shortdesc.replace("'","''") + "'," + str(review_stats.get_review_count(self.nominator, name_changes)) + "," + str(WBGAN.get_GA_count(wbgan, self.nominator, name_changes)) + ")" #print(sql) #GAN.log(conn, "save_nomination_state","inserting nomination",sql) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("save",sql,e) conn.commit() return(cursor.rowcount) #def save_nomination_state(self, conn): # save the current state of the nomination to the nominations table # cursor = conn.cursor() # rst_string = 'null' # if self.review_started_ts is not None: # rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S") # rst_string = "'" + rst_string + "'" # ht_string = 'null' # if self.hold_ts is not None: # ht_string = self.hold_ts.strftime("%Y-%m-%d %H:%M:%S") # ht_string = "'" + ht_string + "'" # sot_string = 'null' # if self.second_opinion_ts is not None: # sot_string = self.second_opinion_ts.strftime("%Y-%m-%d %H:%M:%S") # sot_string = "'" + sot_string + "'" # if self.reviewer == None: # self.reviewer = '' # if self.note == None: # self.note = '' # if self.note == None: # self.note = '' # #print("title = <" + str(self.title) + ">") # #print("nominator = <" + str(self.nominator) + ">") # #print("nomination_ts = <" + str(self.nomination_ts) + ">") # #print("status = <" + str(self.status) + ">") # #print("reviewer = <" + str(self.reviewer) + ">") # #print("note = <" + str(self.note) + ">") # #print("review_started_ts = <" + str(self.review_started_ts) + ">") # #print("hold_ts = <" + str(self.hold_ts) + ">") # #print("second_opinion_ts = <" + str(self.second_opinion_ts) + ">") # #print("shortdesc = <" + str(self.shortdesc) + ">") # sql = "insert into " + GA_config.strings['nominations table name'] + " (title, page, nominator, nomination_ts, status, reviewer, subtopic, note, review_started_ts, hold_ts, second_opinion_ts, shortdesc) values " # sql += "('" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + self.nominator.replace("'","''") + "','" + str(self.nomination_ts) + "','" + self.status.replace("'","''") + "','" + self.reviewer.replace("'","''") + "','" + self.subtopic.replace("'","''") + "','" + self.note.replace("'","''") + "'," + rst_string + "," + ht_string + "," + sot_string + ",'" + self.shortdesc.replace("'","''") + "')" # #print(sql) # try: # cursor.execute(sql) # except pymysql.Error as e: # GAN.notify_error("save",sql,e) # conn.commit() # return(cursor.rowcount) def update_status(self, new_status): # Status definitions are: # R -- on review. This requires that a review page exists. If it doesn't exist, the page is not on review. Note that a page may have been deleted. # H -- on hold. Same as on review but the GA nominee status is set to onhold. # 2 -- second opinion. Same as on review but the GA nominee status is set to 2ndopinion. # P -- the reviewer passed the GAN. There should be no GA nominee template. The bot detects a pass by finding a GA template instead of a GA nominee template # '' -- empty string status. This means the article is nominated but is not being reviewed. This is inconsistent with the existence of a review page. # X -- the nomination has been removed. This happens if the GA nominee status is deleted. It should also happen if the review page is deleted. self.status_link_url = 'https://en.wikipedia.org/w/index.php?title=Talk:' + urllib.parse.quote(self.title) + '%2FGA' + str(self.page_num) if new_status in ['R','review','onreview']: self.status = 'R' self.status_string = 'Under review' self.status_template_string = 'onreview' elif new_status in ['H','hold','onhold']: self.status = 'H' self.status_string = 'On hold' self.status_template_string = 'onhold' elif new_status in ['2','2nd opinion','2ndopinion']: self.status = '2' self.status_string = '2nd opinion' self.status_template_string = '2ndopinion' elif new_status in ['P','Passed']: self.status = 'P' self.status_string = 'Passed' self.status_template_string = '' elif new_status in ['F','Failed']: self.status = 'F' self.status_string = 'Failed' self.status_template_string = '' elif new_status in ['X','Removed']: self.status = 'X' self.status_string = 'Removed' self.status_template_string = '' elif new_status in ['', None]: self.status = '' self.status_string = 'Start review' self.status_template_string = '' self.status_link_url +='&action=edit&editintro=Template:GAN/editintro&preload=Template:GAN/preload' return None def new_status_message(self, new_status, old_status): # returns a string for the edit summary saying what changed # Needs the reviewer added for H and R #print("Called new_status_message for " + str(self.title) + " with args new_status = <" + str(new_status) + "> and old_status = <" + str(old_status) + ">; edit summary is initially <" + str(self.edit_summary) + ">") if old_status == new_status: # Should never happen self.edit_summary = "No change" elif old_status == None: if new_status == "": self.edit_summary = "New [[" + self.title + "]] (" + self.subtopic + ")" elif new_status == "H": self.edit_summary = "On hold [[" + self.title + "]] by " + self.reviewer elif new_status == "2": self.edit_summary = "Second opinion requested for [[" + self.title + "]] by " + self.reviewer elif new_status == "R": self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer elif new_status == "P": self.edit_summary = "Passed [[" + self.title + "]]" elif new_status == "F": self.edit_summary = "Failed [[" + self.title + "]]" elif new_status == "X": self.edit_summary = "Removed [[" + self.title + "]]" elif new_status == "P": self.edit_summary = "Passed [[" + self.title + "]]" elif new_status == "F": self.edit_summary = "Failed [[" + self.title + "]]" elif new_status == "X": self.edit_summary = "Removed [[" + self.title + "]]" elif new_status == "H": self.edit_summary = "On hold [[" + self.title + "]] by " + self.reviewer elif new_status == '2': self.edit_summary = "2nd opinion [[" + self.title + "]]" elif new_status == 'R': self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer elif new_status == '': if old_status == "H": self.edit_summary = "No longer on hold [[" + self.title + "]]" elif old_status == "2": self.edit_summary = "No longer on second opinion [[" + self.title + "]]" elif old_status == "R": self.edit_summary = "No longer on review [[" + self.title + "]]" else: self.edit_summary = "Status changed from <" + old_status + "> to <" + new_status + "> for [[" + self.title + "]]" #print("Called new_status_message for " + str(self.title) + " with args new_status = <" + str(new_status) + "> and old_status = <" + str(old_status) + ">; set edit summary to <" + str(self.edit_summary) + ">") def tell_nominator(self): # Leave the nominator a talk page message about the state of the nomination #print("In tell nominator for " + self.title) #if self.title == 'United Nations General Assembly Building': # return(None) if self.reviewer in GA_config.reviewers_who_notify_nominators: return(None) site = pywikibot.Site('en','wikipedia') title = self.title page = str(self.page_num) if self.status == 'X': return() result = "" if self.status == 'H': result = "|result=hold" elif self.status == 'P': result = "|result=pass" elif self.status == "F": result = "|result=fail" msg = "\n{{subst:GANotice|article=" + title + "|days=7" + result + "|reviewlink=Talk:" + title + "/GA" + page + "}} " msg += "<!-- Template:GANotice --> <small>Message delivered by [[User:ChristieBot|ChristieBot]], on behalf of [[User:" + self.reviewer + "|" + self.reviewer + "]]</small> -- [[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) ~~~~~\n" if GA_config.is_live: talk_page_name = "User talk:" + self.nominator else: talk_page_name = GA_config.strings['GA nominator talk page'] talk_page = pywikibot.Page(site, talk_page_name) talk_page.text += msg try: talk_page.save("Your [[WP:GA|GA]] nomination of [[" + self.title + "]]",minor=False) except: GAN.notify_error("tell_nominator","Saving talk page notification for " + self.title + "/" + self.status_string,e) return(None) def compare(self, other_nom): # returns a dictionary of diffs between this nomination and a dictionary of active_nomination values matches = {'title': True, 'page': True, 'nomination_ts': True, 'nominator': True, 'status': True, 'reviewer': True, 'subtopic': True, 'note': True, 'snapshot_ts': True, 'review_started_ts': True, 'hold_ts': True, 'second_opinion_ts': True} if self.title != other_nom['title']: matches['title'] = False if str(self.page_num) != str(other_nom['page']): matches['page'] = False if self.nomination_ts != other_nom['nomination_ts']: matches['nomination_ts'] = False if self.nominator != other_nom['nominator']: matches['nominator'] = False if self.status != other_nom['status']: matches['status'] = False if self.reviewer != other_nom['reviewer'] and not (self.reviewer == None and other_nom['reviewer'] == ''): matches['reviewer'] = False if self.subtopic != other_nom['subtopic']: matches['subtopic'] = False if self.note != other_nom['note']: matches['note'] = False if self.review_started_ts != other_nom['review_started_ts']: matches['review_started_ts'] = False if self.hold_ts != other_nom['hold_ts']: matches['hold_ts'] = False if self.second_opinion_ts != other_nom['second_opinion_ts']: matches['second_opinion_ts'] = False return matches def recalculate_custom_sort(self): # custom_sort_1 -- New nominators (zero promoted GAs) are first, followed by R_over_G in descending order # New nominators are sorted in descending order of number of reviews done, then by age # The rest are sorted within R_over_G by ascending number of GAs a = '' if self.nominator_GAs == 0: key1 = '0' key2 = str(99999 - self.nominator_reviews) key3 = str(99999 - self.age_in_days) key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S") a = key1 + key2 + key3 + key4 else: key1 = '99' key2 = str(90000 - int(100.0 * float(self.R_over_G))) key3 = str(10000 + self.nominator_GAs) key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S") a = key1 + key2 + key3 + key4 self.custom_sort_1 = a self.custom_sort_2 = self.nomination_ts.strftime("%Y%m%d%H%M%S") + self.title free_GAs = GA_config.free_GAs if int(self.nominator_GAs) <= free_GAs: #print("In free branch: nGAs = <" + str(self.nominator_GAs) + ">; free_GAs = " + str(free_GAs) + ">") key1 = str(100 + self.nominator_GAs)[1:] key2 = str(99999 - self.nominator_reviews) #key3 = str(99999 - self.age_in_days) key3 = self.nomination_ts.strftime("%Y%m%d%H%M%S") a = key1 + key2 + key3 else: #print("In non-free branch") key1 = '99' key2 = str(90000 - int(100.0 * float(self.R_plus_one_over_G))) key3 = str(10000 + self.nominator_GAs) #key4 = str(99999 - self.age_in_days) key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S") key5 = self.title a = key1 + key2 + key3 + key4 + key5 self.custom_sort_1 = a #print("custom sort 1 for " + self.title + " is <" + self.custom_sort_1 + ">") a = '' if self.nominator_GAs == 0: key1 = '00' key2 = str(99999 - self.nominator_reviews) key3 = str(99999 - self.age_in_days) key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S") a = key1 + key2 + key3 + key4 else: key1 = '99' key2 = str(90000 - int(100.0 * float(self.R_plus_one_over_G))) #key2 = str(90000 - int(100.0 * float(self.R_over_G))) key3 = str(10000 + self.nominator_GAs) key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S") a = key1 + key2 + key3 + key4 self.custom_sort_3 = a #if self.title in ('Kamil Tolon','Tellico Dam','Renewable energy in Turkey','Cartesian tree'): # print(self.title) # print("reviews: " + str(self.nominator_reviews)) # print("nominator_GAs: " + str(self.nominator_GAs)) # print("age in days: " + str(self.age_in_days)) # print("key1: " + key1) # print("key2: " + key2) # print("key3: " + key3) # print("key4: " + key4) # print("cs3: " + self.custom_sort_3) # print("R_over_G: " + str(self.R_over_G)) # print("R_plus_one_over_G: " + str(self.R_plus_one_over_G)) def recalculate_RG(self): R = self.nominator_reviews G = self.nominator_GAs self.R_over_G = 0.0 if R > 0 and G == 0: self.R_over_G = 'Infinity' elif R > 0: self.R_over_G = "{:.2f}".format(float(R)/float(G)) self.R_minus_G = R - G def recalculate_R_plus_one_over_G(self): R = self.nominator_reviews G = self.nominator_GAs self.R_plus_one_over_G = 0.0 if R > 0 and G == 0: self.R_plus_one_over_G = 'Infinity' #elif R > 0: elif G > 0: self.R_plus_one_over_G = "{:.2f}".format((float(1.0) + float(R))/float(G)) self.R_minus_G = R - G class Nom_list: def __init__(self): # Constructor self.noms = [] self.noms_dict = {} def add(self, nom): # Add a nom to the dictionary for this list. Key is a tuple of title and page self.noms.append(nom) self.noms_dict[(nom.title, nom.page_num)] = nom def print_GAN_entries(self, target, sort_order, reverse_bool, review_stats, name_changes): # print a table, include the tabletop, each row, and the table bottom. self.noms = sorted(self.noms, key=operator.attrgetter(sort_order),reverse=False) section_text = [] for nom in self.noms: section_text.append(nom.print_GAN_entry(review_stats, name_changes)) return("\n".join(section_text)) class Review_stats: # There are three tables that hold reviewing information: # * GA_reviewing_baseline -- holds a record for each reviewer, showing how many reviews they had done at the time the table was created. This should never be updated again. # * GA_reviews -- holds one record for each review done since the baseline. A few records may precede the baseline but the code is written to ignore these. # * GA_reviewing -- the current reviewing statistics are in this table. It can be rebuilt by the update_statistics() method, which adds a count of the reviews in GA_reviews to the baseline. # * name_changes -- holds "old_name"/"new_name" pairs. Used to combine data from an old user name into the newer name @classmethod def update_statistics(cls, conn): # Delete the GA_reviewing table and recreate it from the baseline plus the table of reviews # Note this does *NOT* take into account the name changes. I think that would make it too complicated. It does a simple match to the review name; we can address name # changes when reporting to the stats page sql = "delete from " + GA_config.strings['GA reviewing statistics table name'] cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: conn.rollback() GAN.notify_error("update_statistics",sql,e) return False sql = "insert into " + GA_config.strings['GA reviewing statistics table name'] + " select a.reviewer, sum(a.num_reviews) as num_reviews, '" + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + "' as snapshot_ts from (" sql += " select binary b.reviewer as reviewer, b.num_reviews from " + GA_config.strings['GA reviewing baseline table name'] + " b" sql += " union all" sql += " select r.reviewer as reviewer, count(*) as num_reviews from " + GA_config.strings['GA reviews table name'] + " r where r.review_ts > (select max(snapshot_ts) as base_ts from " + GA_config.strings['GA reviewing baseline table name'] + ") and r.superseded_ts is null group by binary r.reviewer" sql += ") a group by binary a.reviewer order by sum(a.num_reviews) desc" GAN.log(conn,"update_statistics", "N/A","sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: conn.rollback() GAN.notify_error("update_statistics",sql,e) return False conn.commit() # Now read the corrections page and reload the corrections table. site = pywikibot.Site('en','wikipedia') grsc_page_title = GA_config.strings['GA reviewing stats corrections page'] grsc_page = pywikibot.Page(site,grsc_page_title) corrections_text = grsc_page.text # Delete the existing records cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "delete from " + GA_config.strings['GA reviewing statistics corrections table name'] try: cursor.execute(sql) except pymysql.Error as e: conn.rollback() GAN.notify_error("update_statistics: deleting old corrections",sql,e) return False GAN.log(conn,"update_statistics", None,"deleted old corrections") # Parse the text to find the table and split into rows corrections_re = re.search("!Page number\n!Old reviewer\n!New reviewer\n!Comment\n", corrections_text) if corrections_re is None: pass # There are no corrections to be made to the statistics else: remaining_text = corrections_text[corrections_re.span()[1]:] corrections_rows = remaining_text.split("\n|-") for crow in corrections_rows: crow = crow.replace('\n','').replace('|-','').replace('||','|').strip() crow = crow[1:] crow_values = crow.split('|') # Possibly check that the review cited does exist and the old reviewer name does match sql = "insert into " + GA_config.strings['GA reviewing statistics corrections table name'] + " (article_title, page_number, old_reviewer, new_reviewer, comment) values " sql += "('" + crow_values[0] + "', " + crow_values[1] + ", '" + crow_values[2] + "','" + crow_values[3] + "','" + crow_values[4] + "')" try: cursor.execute(sql) except pymysql.Error as e: conn.rollback() GAN.notify_error("update_statistics: inserting correction",sql,e) break GAN.log(conn,"update_statistics", None,"inserted a correction") conn.commit() # Assemble any errors and post them at the end of the corrections page? Need to delete and rewrite that section if so. return Review_stats.write_statistics_page(conn) @classmethod def write_statistics_page(cls, conn): # write the Wikipedia page that holds the GA reviewing stats # Modify the query to include corrections #sql = "select reviewer, sum(num_reviews) as num_reviews_g from" #sql += " (select case when n.new_name is null then r.reviewer else n.new_name end as reviewer, r.num_reviews" #sql += " from " + GA_config.strings['GA reviewing statistics table name'] + " r" #sql += " left join " + GA_config.strings['name changes table name'] + " n on r.reviewer = n.old_name" #sql += " ) a group by a.reviewer order by num_reviews_g desc, a.reviewer" sql = "select reviewer, sum(num_reviews) as num_reviews_g from" sql += " (select case when n.new_name is null then r.reviewer else n.new_name end as reviewer, r.num_reviews" sql += " from " + GA_config.strings['GA reviewing statistics table name'] + " r" sql += " left join " + GA_config.strings['name changes table name'] + " n on r.reviewer = n.old_name" sql += " union all " sql += " select old_reviewer, 0-count(*) as num_reviews from " + GA_config.strings['GA reviewing statistics corrections table name'] sql += " union all " sql += " select new_reviewer, count(*) as num_reviews from " + GA_config.strings['GA reviewing statistics corrections table name'] sql += " ) a group by a.reviewer order by num_reviews_g desc, a.reviewer" GAN.log(conn,"write_statistics", None,"sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("write_statistics",sql,e) return False stats_page = ['<table class="wikitable">','<tr><th>User</th><th>Reviews</th></tr>'] for row in cursor.fetchall(): stats_page.append('<tr> <td> [[User:' + row['reviewer'] + '|' + row['reviewer'] + ']] </td> <td> ' + str(row['num_reviews_g']) + ' </td> </tr>') stats_page.append('</table>') stats_text = '\n'.join(stats_page) site = pywikibot.Site('en','wikipedia') grs_page_title = GA_config.strings['GA reviewing stats page'] grs_page = pywikibot.Page(site,grs_page_title) grs_page.text = stats_text grs_page.save("Updating GA reviewing statistics") return True def __init__(self, conn): # New constructor for getting data from the database # No attempt is made here to deal with the name changes. That has to be done by the code that looks up the review counts. self.reviewers = {} # Will contain a dictionary of reviewers giving the number of reviews they've done sql = "select reviewer, num_reviews from " + GA_config.strings['GA reviewing statistics table name'] cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("getting reviewing stats","sql",e) result = cursor.fetchall() for row in result: # dump the data into a local dictionary for use in the statistics self.reviewers[row['reviewer']] = row['num_reviews'] def get_review_count(self, user_name, name_changes): # returns the number of reviews for a given user. Adds the number in the reviewing statistics to the number found for any alternate names. reviews = 0 if user_name in self.reviewers.keys(): reviews= int(self.reviewers[user_name]) if user_name in name_changes.keys(): if name_changes[user_name] in self.reviewers.keys(): reviews += int(self.reviewers[name_changes[user_name]]) elif user_name in name_changes.values(): old_names = [x for x in name_changes.keys() if name_changes[x] == user_name] for old_name in old_names: if old_name in self.reviewers.keys(): reviews += int(self.reviewers[old_name]) return reviews class WBGAN: # All this data is derived from the tables stored in the WP:WBGAN page database @classmethod def get_wbgan(cls, config, gan_conn): wbgan = {} with gan_conn.cursor() as cursor: sql = "select nominator, count(*) as GA_count from " + GA_config.strings['historical GA reviews table name'] + " where type = 'GAN' " sql += " and lower(outcome) in ('pass','passed','listed', 'promoted') group by nominator order by count(*) desc" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("getting nominator data",sql,e) result = cursor.fetchall() for row in result: wbgan[row[0]] = row[1] return wbgan # # Below is the code to retrieve this data from SDZeroBot. That listing has the advantage of including very old GAs, but it does not include users with an # apostrophe, and doesn't include old successful nominations if they are delisted or now FAs # #database = "s54328__goodarticles_p" #conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s54328__goodarticles_p", host='tools.db.svc.eqiad.wmflabs') #with conn.cursor() as cursor: # sql = "select nominator, count(*) as GA_count from nominators group by nominator order by count(*) desc" # try: # cursor.execute(sql) # except pymysql.Error as e: # GAN.notify_error("getting WBGAN data",sql,e) # result = cursor.fetchall() # for row in result: # wbgan[row[0]] = row[1] #return wbgan @classmethod def get_GA_count(cls, wbgan, user_name, name_changes): # returns the number of GAs for a given user. Adds the number in the reviewing statistics to the number found for any alternate names. GAs = 0 if user_name in wbgan.keys(): GAs = int(wbgan[user_name]) if user_name in name_changes.keys(): if name_changes[user_name] in wbgan.keys(): GAs += int(wbgan[name_changes[user_name]]) if user_name in name_changes.values(): old_names = [x for x in name_changes.keys() if name_changes[x] == user_name] for old_name in old_names: if old_name in wbgan.keys(): GAs += int(wbgan[old_name]) return GAs @classmethod def get_one(cls, config, title): database = "s54328__goodarticles_p" conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s54328__goodarticles_p", host='tools.db.svc.eqiad.wmflabs') sql = "select nominator, date as promotion_date from nominators where article = '" + title.replace("'","''") + "'" cursor = conn.cursor(pymysql.cursors.DictCursor) records_found = 0 try: records_found = cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("WBGAN.get_one",sql,e) return None if records_found > 1: GAN.notify_error("WBGAN.get_one",sql,"Found more than one record") return None elif records_found == 0: return None else: result = cursor.fetchone() return result @classmethod def get_promotion_date(cls, config, title, promoted_near_date): wbgan_row = WBGAN.get_one(config, title) if wbgan_row is None: return None else: #print(title + " was promoted on " + str(promoted_near_date)) wbgan_datetime = datetime.datetime.combine(wbgan_row['promotion_date'], datetime.datetime.min.time()) #print("wbgan_datetime is " + str(wbgan_datetime) + "; promoted_near_date is " + str(promoted_near_date)) if promoted_near_date > wbgan_datetime - datetime.timedelta(3) and promoted_near_date > wbgan_datetime + datetime.timedelta(3): #print("inside range") return wbgan_datetime else: #print("outside range") return None class Active_nomination: @classmethod def get_titles(self, conn): titles = [] sql = "select title from " + GA_config.strings['active nominations table name'] # Gets all the nominations that were active at the last run cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("getting active nominations",sql,e) return titles rows = list(cursor.fetchall()) titles = [x['title'] for x in rows] return titles @classmethod def get_active_nomination(cls, conn, title): # Retrieve a row from the active nominations table cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "select n.title, n.page, n.nomination_ts, n.nominator, n.status, n.reviewer, n.subtopic, n.note, n.snapshot_ts, n.review_started_ts, n.hold_ts, n.second_opinion_ts, ifnull(n.shortdesc,'') as shortdesc from " + GA_config.strings['active nominations table name'] + " n " sql += " where n.title = '" + title.replace("'","''") + "'" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("get_active_nomination",sql,e) if cursor.rowcount > 1: GA_config.current_errors.append("More than one active nomination found for [[" + title + "]]\n") return None if cursor.rowcount == 0: return None row = cursor.fetchone() return row @classmethod def update_active_nominations(cls, conn): # The active_nominations table should reflect the current state of the GA nominations. This rebuilds it by summarizing the events in the nominations table. sql = "delete from " + GA_config.strings['active nominations table name'] cursor = conn.cursor() try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("update_active_nominations","sql",e) sql = "insert into " + GA_config.strings['active nominations table name'] + " (title, page, nomination_ts, nominator, status, reviewer, subtopic, note, snapshot_ts, review_started_ts, hold_ts, second_opinion_ts, shortdesc) " sql += " select n.title, n.page, n.nomination_ts, n.nominator, n.status, n.reviewer, n.subtopic, n.note, n.snapshot_ts, n.review_started_ts, n.hold_ts, n.second_opinion_ts, n.shortdesc" sql += " from " + GA_config.strings['nominations table name'] + " n inner join (select title, page, max(snapshot_ts) as max_snapshot_ts from " + GA_config.strings['nominations table name'] + " group by title, page) nm" sql += " on n.title = nm.title and n.page = nm.page and n.snapshot_ts = nm.max_snapshot_ts" sql += " where n.status not in ('P','F','X')" #GAN.log(conn,"update_active_nominations","N/A",sql) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("update_active_nominations",sql,e) conn.commit() return(cursor.rowcount) class GAN: @classmethod def get_link_redirect_target(cls, conn, link): GAN.log(conn,"get_link_redirect_target",link, "Called get_link_redirect_target") site = pywikibot.Site('en','wikipedia') page = pywikibot.Page(site,link) try: if page.exists(): try: page_text = page.get() return(None) # if not an error, then this is not a redirect except pywikibot.exceptions.IsRedirectPageError as e: redir_target = page.getRedirectTarget() return(redir_target) except pywikibot.exceptions.ServerError as e: GAN.notify_error("get_link_redirect_target","ServerError in getting page " + page.title(),e) return(None) else: return(None) except pywikibot.exceptions.Error as e: GAN.log(conn,"get_link_redirect_target:exceptions",link, "Pywikibot server exception " + str(e)) return(None) @classmethod def write_backlog_by_sort_order(cls, gan_conn, all_noms, sort_order): backlog_noms = sorted(all_noms, key=operator.attrgetter(sort_order), reverse=False) work_list = [] nominators_represented = [] # Each nominator can only have one in the backlog list nominators_receiving_reviews = [] # Tracks nominators who have a nomination under review for n in backlog_noms: if n.status != '': if n not in nominators_receiving_reviews: nominators_receiving_reviews.append(n.nominator) for n in backlog_noms: if n.nominator not in nominators_represented and n.nominator not in nominators_receiving_reviews: work_list.append(n) nominators_represented.append(n.nominator) if len(work_list) >= 10: break first_list = ['[[Wikipedia:Good article nominations#'+ x.subtopic + '|' + x.title + ']]' for x in work_list[:5]] second_list = ['[[Wikipedia:Good article nominations#'+ x.subtopic + '|' + x.title + ']]' for x in work_list[5:]] comment_start = "\n<!-- If you clear an item from backlog and want to update the list before the bot next runs, here are the next 5 oldest nominations:\n• " comment_end = "-->" site = pywikibot.Site('en','wikipedia') backlog_by_sort_order_page = pywikibot.Page(site,GA_config.strings['GAN backlog by sort order']) backlog_by_sort_order_page.text = '\n• '.join(first_list) + comment_start + '\n• '.join(second_list) + comment_end backlog_by_sort_order_page.save("Updating high priority backlog") @classmethod def parse_article_history_actions(cls, article_history_text, actions): ah_actions = [] for a in actions: one_action = {} a_date_str = 'null' a_link_str = 'null' a_result_str = 'null' a_oldid_str = 'null' a_ = re.search(a + "\s*=\s*[^\|]*",article_history_text)[0] # a_ contains 'action1 = GAx\n' l = re.search(a + "\s*=\s*", a_) # l contains 'action1 = ' r = a_[l.span()[1]:] # r contains 'GAx\n' type = "" if r[2:3] in ['n','N','c','C']: type = "GAN" elif r[2:3] in ['r','R']: type = "GAR" elif r[2:3] in ['a','A']: type = "DGA" one_action['type'] = type a_date = re.search(a + "date\s*=\s*[^\|]*",article_history_text) if a_date is not None: a_date = a_date[0] l = re.search(a + "date\s*=\s*", a_date) a_raw_date_str = a_date[l.span()[1]:].strip() try: a_date_ts = parse(a_raw_date_str.replace("(UTC)","").replace("(UTC","").strip()) one_action['date'] = a_date_ts a_date_str = "'" + a_date_ts.strftime("%Y-%m-%d %H:%M:%S") + "'" except ValueError as e: continue #print("a_date_str is " + a_date_str) a_link = re.search(a + "link\s*=\s*[^\|]*",article_history_text) if a_link is not None: a_link = a_link[0] l = re.search(a + "link\s*=\s*", a_link) a_link_str = "'" + a_link[l.span()[1]:].strip().replace("'","''") + "'" one_action['link'] = a_link[l.span()[1]:].replace("_"," ").strip() #print("a_link_str is " + a_link_str) a_result = re.search(a + "result\s*=\s*[^\|]*",article_history_text) if a_result is not None: a_result = a_result[0] l = re.search(a + "result\s*=\s*", a_result) a_result_str = "'" + a_result[l.span()[1]:].strip().replace("'","''") + "'" one_action['result'] = a_result[l.span()[1]:].strip() #print("a_result_str is " + a_result_str) a_oldid = re.search(a + "oldid\s*=\s*[^\|]*",article_history_text) if a_oldid is not None: a_oldid = a_oldid[0] l = re.search(a + "oldid\s*=\s*", a_oldid) a_oldid_str = a_oldid[l.span()[1]:].strip() one_action['oldid'] = a_oldid_str if a_oldid_str == "": a_oldid_str = "null" ah_actions.append(one_action) return(ah_actions) @classmethod def get_article_history_actions(cls, article_history_text): matches = re.findall("[Aa]ction\d*\s*=\s*[gG][aA][NnRCcr]",article_history_text) if matches == None: return(None) actions = [] for m in matches: ms = re.search("[Aa]ction[\d]*",m).span() actions.append(m[ms[0]:ms[1]]) return(actions) @classmethod def get_article_history_template(cls, article_text): #print("Calling get_article_history_template") ah_re = re.search("{{[aA]rticle\s?[hH]istory",article_text) #print("matching string is " + str(ah_re)) if ah_re == None: return(None) ah_start = ah_re.span()[0] ah_text = article_text[ah_start:] # Strips everything before the template ah_text = (GAN.find_enclosed_string(ah_text))[0] # This means ah_text has the article history template text and nothing else ah_text = ah_text[2:-2] # Strip the braces ah_text = ah_text.strip() return(ah_text) @classmethod def is_redirect(cls, page): try: test = page.get() except pywikibot.exceptions.IsRedirectPageError as e: return(True) return(False) @classmethod def flush_audit(cls, conn): audit_days_to_keep = GA_config.audit_days_to_keep sql = "delete from " + GA_config.strings['audit table name'] + " where event_ts < now() - interval " + str(audit_days_to_keep) + " day;" cursor = conn.cursor() try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("flush_audit",sql,e) conn.commit() return(cursor.rowcount) @classmethod def log(cls, conn, source, title, message): # This method writes records to the audit table. Setting logging flags in GA_config allows debug messages to be limited to particular # spans of code. # If logging is disabled do nothing. If it's not explicitly disabled it's enabled. message = message[:1000] log = True if source in GA_config.logging_flags.keys(): log = GA_config.logging_flags[source] if log == True: sql = "insert into " + GA_config.strings['audit table name'] + " (event_ts, source, title, message) values (now(6),'" + str(source) + "','" + str(title).replace("'","''") + "','" + str(message).replace("'","''") + "')" cursor = conn.cursor() # if we are on a tty output via print as well as writing to db try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("log",sql,e) conn.commit() if sys.stdout.isatty(): # We are running interactively so print the log message to stdout print(str(title) + ' / ' + str(message)) return(None) @classmethod def wiki2datetime(cls, wikistamp): time, date = wikistamp.split(', ') hour, minute = time.split(':') day, month, year, UTC = date.split(' ') month = GAN.monthConvert(month) dtVals = [int(year), int(month), int(day), int(hour), int(minute)] dt = datetime.datetime(*dtVals) return(dt) @classmethod def monthConvert(cls, name): ''' Takes in either the name of the month or the number of the month and returns the opposite. An input of str(July) would return int(7) while an input of int(6) would return str(June). Takes: int OR string Returns: string OR int ''' if type(name) is str: if name == "January": return 1 elif name == "February": return 2 elif name == "March": return 3 elif name == "April": return 4 elif name == "May": return 5 elif name == "June": return 6 elif name == "July": return 7 elif name == "August": return 8 elif name == "September": return 9 elif name == "October": return 10 elif name == "November": return 11 elif name == "December": return 12 else: raise ValueError elif type(name) is int: if name == 1:return('January') elif name == 2:return('February') elif name == 3:return('March') elif name == 4:return('April') elif name == 5:return('May') elif name == 6:return('June') elif name == 7:return('July') elif name == 8:return('August') elif name == 9:return('September') elif name == 10:return('October') elif name == 11: return('November') elif name == 12: return('December') else: raise ValueError @classmethod def check_params(cls, params, title): # Get the GA params for a nomination param_errors = [] #title = "Undefined article title" #if 'title' in params.keys(): # if params['title'] != None: # title = params['title'] if params['page'] == None: param_errors.append("invalid review page parameter") params['page'] = '0' # set to zero so that the rest of the code will have something to work with; this will be reported as an error if params['nominator'] == None: param_errors.append("invalid nominator parameter") params['nominator'] = 'Example' if params['status'] == None or params['status'] not in ['','2','H','R']: params['status'] = 'R' # set to onreview as the default and post an error param_errors.append("invalid status parameter") if len(param_errors) > 0: malformed_details = '; '.join(param_errors) GA_config.current_errors.append("\nMalformed nomination for [[" + title + "]]: " + malformed_details) #print('Found malformed nomination') return param_errors @classmethod def get_params(cls, conn, article): # Get the GA params for a nomination GAN.log(conn,"get_params",article.title(), "Called get_params") #print("Calling get_GA_params with article " + article.title()) try: article_text = article.text except pywikibot.exceptions.ServerError as e: GA_config.current_errors.append('\nGot a pywikibot server error when trying to read the text of ' + article.title()) GAN.log(conn,"get_params:exceptions",article.title(), "Got a pywikibot server error when trying to read the article text") return None title = article.title()[5:] GAN_re = re.search('{{GA[ ]?nominee',article.text) if GAN_re == None: return None GAN_start = GAN_re.span()[0] GAN_text = article.text[GAN_start:] # Strips everything before the template GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else # The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values. # This is necessary because the param values can include more template calls. #print(GAN_text) param_offsets = {} found_params = {} rt_match = re.search('\|[\s]*time[\s]*=[\s]*',GAN_text) if rt_match != None: param_offsets[rt_match.span()[0]] = 'rtime' found_params['rtime']=rt_match.span() nm_match = re.search('\|[\s]*nominator[\s]*=[\s]*',GAN_text) if nm_match != None: param_offsets[nm_match.span()[0]] = 'nominator' found_params['nominator']=nm_match.span() pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text) if pg_match != None: param_offsets[pg_match.span()[0]] = 'page' found_params['page']=pg_match.span() su_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text) if su_match != None: param_offsets[su_match.span()[0]] = 'subtopic' found_params['subtopic']=su_match.span() st_match = re.search('\|[\s]*status[\s]*=[\s]*',GAN_text) if st_match != None: param_offsets[st_match.span()[0]] = 'status' found_params['status']=st_match.span() nt_match = re.search('\|[\s]*note[\s]*=[\s]*',GAN_text) if nt_match != None: param_offsets[nt_match.span()[0]] = 'note' found_params['note']=nt_match.span() sd_match = re.search('\|[\s]*shortdesc[\s]*=[\s]*',GAN_text) if sd_match != None: param_offsets[sd_match.span()[0]] = 'shortdesc' found_params['shortdesc']=sd_match.span() sorted_param_keys = sorted(param_offsets.keys()) #print(sorted_param_keys) #print(param_offsets) #print(found_params) if sorted_param_keys == []: return None up_to_first_named_param = GAN_text[:sorted_param_keys[0]] #print(up_to_first_named_param) first_bar = up_to_first_named_param.find('|') try: tm_text = up_to_first_named_param[first_bar+1:] except: GA_config.current_errors.append('\nCould not parse timestamp for ' + title) return None #print("TM <" + tm_text + ">") tm_match = re.search("\d\d:\d\d,.* \(UTC\)",tm_text) if tm_match == None: GA_config.current_errors.append('\nCould not parse timestamp for ' + title) return None else: #print(str(tm_match.span())) tm_text = tm_text[tm_match.span()[0]:tm_match.span()[1]] try: timestamp = GAN.wiki2datetime(tm_text) except: GA_config.current_errors.append('\nCould not parse timestamp for ' + title) return None #print(timestamp) named_param_strings = [] for a in range(len(sorted_param_keys)-1): named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]]) named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:]) #print(named_param_strings) status = '' user_nom = None page = None note = '' subtopic = 'Miscellaneous' shortdesc = '' for i in range(len(sorted_param_keys)): param = param_offsets[sorted_param_keys[i]] param_i = named_param_strings[i] if param_i[-2:] == "}}": param_i = param_i[:-2] if param_i[-1:] == '|': param_i = param_i[:-1] if param_i[0] == '|': param_i = param_i[1:] if param == 'nominator': user_search = re.search('(User:|user:|User talk:|User Talk:|user Talk:|user talk:)[^\|\]]+',param_i) if user_search == None: continue else: user_span = user_search.span() user_text = param_i[user_span[0]:user_span[1]] user_nom_span = (re.search(':',user_text)).span() user_nom = user_text[user_nom_span[1]:] #print('U1 = '+ user_nom) site = pywikibot.Site('en','wikipedia') # Users sometimes sign with something other than their exact user name. If we follow the link to their user page and extract that page's title that resolves any differences. upage = pywikibot.Page(site, "User:" + user_nom) user_nom = upage.title()[5:] #print('U2 = '+ user_nom) elif param == 'rtime': continue elif param == 'subtopic': subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip() #print('Su ='+subtopic) elif param == 'page': page_match = re.search('[0-9]+',param_i) if page_match == None: page = None else: page_n_span = page_match.span() page = param_i[page_n_span[0]:page_n_span[1]] #print('P =' + page) elif param == 'note': note = param_i.replace('note','').replace('=','').strip() #print('Nt ='+note) elif param == 'status': status_string = param_i.replace('|','').replace('status','').replace('=','').strip() #print('St string = <'+status_string+'>') if status_string.lower() in ['onreview','review','on review']: status = 'R' elif status_string.lower() in ['onhold','hold','on hold']: status = 'H' elif status_string.lower() in ['2ndopinion', '2nd opinion']: status = '2' elif status_string == '': status = '' else: status = None #print('Stat = '+ status) elif param == 'shortdesc': shortdesc = param_i.replace('shortdesc','').replace('=','').strip() #print('Sd ='+shortdesc) param_dict = {} param_dict['nominator']=user_nom param_dict['status']=status param_dict['page']=page param_dict['subtopic']=subtopic param_dict['timestamp']=timestamp param_dict['note']=note param_dict['shortdesc']=shortdesc return(param_dict) @classmethod def get_params_from_text_X(cls, title, rev_text): # Get the GA params for a nomination #TODO delete this -- I don't think anything uses it. #Marked with_X to see if it's used GAN_re = re.search('{{GA\s?[nN]ominee',rev_text) if GAN_re == None: #print("Can't find template") return None GAN_start = GAN_re.span()[0] GAN_text = rev_text[GAN_start:] # Strips everything before the template GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else # The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values. # This is necessary because the param values can include more template calls. #print(GAN_text) param_offsets = {} found_params = {} rt_match = re.search('\|[\s]*time[\s]*=[\s]*',GAN_text) if rt_match != None: param_offsets[rt_match.span()[0]] = 'rtime' found_params['rtime']=rt_match.span() nm_match = re.search('\|[\s]*nominator[\s]*=[\s]*',GAN_text) if nm_match != None: param_offsets[nm_match.span()[0]] = 'nominator' found_params['nominator']=nm_match.span() pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text) if pg_match != None: param_offsets[pg_match.span()[0]] = 'page' found_params['page']=pg_match.span() su_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text) if su_match != None: param_offsets[su_match.span()[0]] = 'subtopic' found_params['subtopic']=su_match.span() st_match = re.search('\|[\s]*status[\s]*=[\s]*',GAN_text) if st_match != None: param_offsets[st_match.span()[0]] = 'status' found_params['status']=st_match.span() nt_match = re.search('\|[\s]*note[\s]*=[\s]*',GAN_text) if nt_match != None: param_offsets[nt_match.span()[0]] = 'note' found_params['note']=nt_match.span() sd_match = re.search('\|[\s]*shortdesc[\s]*=[\s]*',GAN_text) if sd_match != None: param_offsets[sd_match.span()[0]] = 'shortdesc' found_params['shortdesc']=sd_match.span() sorted_param_keys = sorted(param_offsets.keys()) #print(sorted_param_keys) #print(param_offsets) #print(found_params) if sorted_param_keys == []: return None up_to_first_named_param = GAN_text[:sorted_param_keys[0]] #print(up_to_first_named_param) first_bar = up_to_first_named_param.find('|') timestamp = None try: tm_text = up_to_first_named_param[first_bar+1:] except: GA_config.current_errors.append('\nCould not parse timestamp for ' + title) return None #print("TM <" + tm_text + ">") try: timestamp = parse(tm_text.replace("(UTC)","").strip()) except ValueError as e: pass # For this version we allow invalid timestamps to still return the other params named_param_strings = [] for a in range(len(sorted_param_keys)-1): named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]]) named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:]) #print(named_param_strings) status = '' user_nom = None page = None note = '' subtopic = 'Miscellaneous' shortdesc = '' for i in range(len(sorted_param_keys)): param = param_offsets[sorted_param_keys[i]] param_i = named_param_strings[i] if param_i[-2:] == "}}": param_i = param_i[:-2] if param_i[-1:] == '|': param_i = param_i[:-1] if param_i[0] == '|': param_i = param_i[1:] if param == 'nominator': user_search = re.search('(User:|user:|User talk:|User Talk:|user Talk:|user talk:)[^\|\]]+',param_i) if user_search == None: continue else: user_span = user_search.span() user_text = param_i[user_span[0]:user_span[1]] user_nom_span = (re.search(':',user_text)).span() user_nom = user_text[user_nom_span[1]:] #print('U1 = '+ user_nom) site = pywikibot.Site('en','wikipedia') # Users sometimes sign with something other than their exact user name. If we follow the link to their user page and extract that page's title that resolves any differences. page = pywikibot.Page(site, "User:" + user_nom) user_nom = page.title()[5:] #print('U2 = '+ user_nom) elif param == 'rtime': continue elif param == 'subtopic': if param_i[0] == '|': param_i = param_i[1:] if param_i[-1] == '|': param_i = param_i[:-1] if param_i.find("|") > -1: param_i = param_i[:param_i.find("|")] subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip() #print('Su ='+subtopic) elif param == 'page': page_match = re.search('[0-9]+',param_i) if page_match == None: page = None else: page_n_span = page_match.span() page = param_i[page_n_span[0]:page_n_span[1]] #print('P =' + page) elif param == 'note': note = param_i.replace('note','').replace('=','').strip() #print('Nt ='+note) elif param == 'status': status_string = param_i.replace('|','').replace('status','').replace('=','').strip() #print('St string = <'+status_string+'>') if status_string.lower() in ['onreview','review','on review']: status = 'R' elif status_string.lower() in ['onhold','hold','on hold']: status = 'H' elif status_string.lower() in ['2ndopinion', '2nd opinion']: status = '2' elif status_string == '': status = '' else: status = None #print('Stat = '+ status) elif param == 'shortdesc': shortdesc = param_i.replace('shortdesc','').replace('=','').strip() #print('Sd ='+shortdesc) param_dict = {} param_dict['nominator']=user_nom param_dict['status']=status param_dict['page']=page param_dict['subtopic']=subtopic param_dict['timestamp']=timestamp param_dict['note']=note param_dict['shortdesc']=shortdesc return(param_dict) @classmethod def get_failed_params(cls, article, offset): # Get the GA params for a failed GA #print("Calling get_failed_params with article " + article.title()) title = article.title() text = article.text[offset:] #print("in gfp, stripped text is " + text) GAN_re = re.search('{{Failed\s?GA',text) if GAN_re == None: return None GAN_start = GAN_re.span()[0] GAN_text = text[GAN_start:] # Strips everything before the template GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else # The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values. # This is necessary because the param values can include more template calls. #print("in gfp, template only text is "+GAN_text) param_offsets = {} found_params = {} tp_match = re.search('\|[\s]*topic[\s]*=[\s]*',GAN_text) if tp_match != None: param_offsets[tp_match.span()[0]] = 'topic' found_params['topic']=tp_match.span() st_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text) if st_match != None: param_offsets[st_match.span()[0]] = 'subtopic' found_params['subtopic']=st_match.span() pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text) if pg_match != None: param_offsets[pg_match.span()[0]] = 'page' found_params['page']=pg_match.span() ol_match = re.search('\|[\s]*oldid[\s]*=[\s]*',GAN_text) if ol_match != None: param_offsets[ol_match.span()[0]] = 'oldid' found_params['oldid']=ol_match.span() sm_match = re.search('\|[\s]*small[\s]*=[\s]*',GAN_text) if sm_match != None: param_offsets[sm_match.span()[0]] = 'small' found_params['small']=sm_match.span() dt_match = re.search('\|[\s]*date[\s]*=[\s]*',GAN_text) if dt_match != None: param_offsets[dt_match.span()[0]] = 'date' found_params['small']=dt_match.span() sorted_param_keys = sorted(param_offsets.keys()) #print(sorted_param_keys) #print(param_offsets) #print(found_params) if sorted_param_keys == []: return None named_param_strings = [] for a in range(len(sorted_param_keys)-1): named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]]) named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:]) #print(named_param_strings) page = None topic = 'Miscellaneous' subtopic = 'Miscellaneous' date = '' small = '' oldid = '' for i in range(len(sorted_param_keys)): param = param_offsets[sorted_param_keys[i]] param_i = named_param_strings[i] if param_i[-2:] == "}}": param_i = param_i[:-2] if param_i[-1:] == '|': param_i = param_i[:-1] if param_i[0] == '|': param_i = param_i[1:] if param == 'topic': topic = param_i.replace('|','').replace('topic','').replace('=','').strip() #print('Tp ='+topic) elif param == 'subtopic': subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip() #print('Su ='+subtopic) elif param == 'page': page_match = re.search('[0-9]+',param_i) if page_match == None: page = None else: page_n_span = page_match.span() page = param_i[page_n_span[0]:page_n_span[1]] #print('P =' + str(page)) elif param == 'oldid': oldid = param_i.replace('|','').replace('oldid','').replace('=','').strip() #print('Ol ='+ str(oldid)) elif param == 'small': small = param_i.replace('|','').replace('small','').replace('=','').strip() #print('Su ='+small) elif param == 'date': date = param_i.replace('|','').replace('date','').replace('=','').strip() #print('Dt ='+date) param_dict = {} param_dict['topic']=topic param_dict['subtopic']=subtopic param_dict['oldid']=oldid param_dict['page']=page param_dict['small']=small param_dict['date']=date return(param_dict) @classmethod def get_delisted_params(cls, article): # Get the GA params for a delisted GA #print("Calling get_delisted_params with article " + article.title()) title = article.title() GAN_re = re.search('{{[dD]elisted\s?GA',article.text) if GAN_re == None: return None GAN_start = GAN_re.span()[0] GAN_text = article.text[GAN_start:] # Strips everything before the template GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else # The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values. # This is necessary because the param values can include more template calls. print("In get_delisted_params: " + GAN_text) param_offsets = {} found_params = {} tp_match = re.search('\|[\s]*topic[\s]*=[\s]*',GAN_text) if tp_match != None: param_offsets[tp_match.span()[0]] = 'topic' found_params['topic']=tp_match.span() st_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text) if st_match != None: param_offsets[st_match.span()[0]] = 'subtopic' found_params['subtopic']=st_match.span() pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text) if pg_match != None: param_offsets[pg_match.span()[0]] = 'page' found_params['page']=pg_match.span() ol_match = re.search('\|[\s]*oldid[\s]*=[\s]*',GAN_text) if ol_match != None: param_offsets[ol_match.span()[0]] = 'oldid' found_params['oldid']=ol_match.span() sm_match = re.search('\|[\s]*small[\s]*=[\s]*',GAN_text) if sm_match != None: param_offsets[sm_match.span()[0]] = 'small' found_params['small']=sm_match.span() dt_match = re.search('\|[\s]*date[\s]*=[\s]*',GAN_text) if dt_match != None: param_offsets[dt_match.span()[0]] = 'date' found_params['date']=dt_match.span() sorted_param_keys = sorted(param_offsets.keys()) #print(sorted_param_keys) #print(param_offsets) #print(found_params) if sorted_param_keys == []: return None #up_to_first_named_param = GAN_text[:sorted_param_keys[0]] #print(up_to_first_named_param) #first_bar = up_to_first_named_param.find('|') #try: # tm_text = up_to_first_named_param[first_bar+1:] #except: # GA_config.current_errors.append('\nCould not parse timestamp for ' + title) # return None #print("TM <" + tm_text + ">") #tm_match = re.search("\d\d:\d\d,.* \(UTC\)",tm_text) #if tm_match == None: # GA_config.current_errors.append('\nCould not parse timestamp for ' + title) # return None #else: # #print(str(tm_match.span())) # tm_text = tm_text[tm_match.span()[0]:tm_match.span()[1]] #try: # timestamp = GAN.wiki2datetime(tm_text) #except: # GA_config.current_errors.append('\nCould not parse timestamp for ' + title) # return None #print(timestamp) named_param_strings = [] for a in range(len(sorted_param_keys)-1): named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]]) named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:]) #print(named_param_strings) page = None topic = 'Miscellaneous' subtopic = 'Miscellaneous' small = '' oldid = '' date = '' for i in range(len(sorted_param_keys)): param = param_offsets[sorted_param_keys[i]] param_i = named_param_strings[i] if param_i[-2:] == "}}": param_i = param_i[:-2] if param_i[-1:] == '|': param_i = param_i[:-1] if param_i[0] == '|': param_i = param_i[1:] if param == 'topic': topic = param_i.replace('|','').replace('topic','').replace('=','').strip() #print('Tp ='+topic) elif param == 'subtopic': subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip() #print('Su ='+subtopic) elif param == 'page': page_match = re.search('[0-9]+',param_i) if page_match == None: page = None else: page_n_span = page_match.span() page = param_i[page_n_span[0]:page_n_span[1]] #print('P =' + str(page)) elif param == 'oldid': oldid = param_i.replace('|','').replace('oldid','').replace('=','').strip() #print('Ol ='+ str(oldid)) elif param == 'small': small = param_i.replace('|','').replace('small','').replace('=','').strip() #print('Sm ='+small) elif param == 'date': date = param_i.replace('|','').replace('date','').replace('=','').strip() #print('Dt ='+date) param_dict = {} param_dict['topic']=topic param_dict['subtopic']=subtopic param_dict['oldid']=oldid param_dict['page']=page param_dict['small']=small param_dict['date']=date return(param_dict) @classmethod def move_miscellaneous_to_end(cls,topic_list): new_list = [] misc_found = False for i in topic_list: if i != 'Miscellaneous': new_list.append(i) else: misc_found = True if misc_found: new_list.append('Miscellaneous') return(new_list) @classmethod def GAN_page_top_text(cls, target): top_text = ["{{Short description|Wikipedia page for submissions and discussions on Good article distinction candidates}}"] top_text.append("<noinclude>{{pp-semi-indef}}{{pp-move-indef}}</noinclude>") top_text.append("<!-- Putting categories and inter language links here to avoid confusion from people adding to the bottom of last articles section. -->") top_text.append("[[Category:WikiProject Good articles|Good article nominations]]") top_text.append("[[Category:Non-talk pages that are automatically signed]]") top_text.append("<!-- [[Category:Articles needing attention]] -->") top_text.append("{{User:Dispenser/Checklinks/config|interval=fortnightly|generator=all|convert=None|namespaces=0}}") top_text.append("<!-- End of categories and interwikis-->") top_text.append("{{/guidelines}}") top_text.append("") top_text.append("={{anchor|NomsByTopic}}Nominations=") top_text.append("[[File:GA candidate.svg|40px|left|Good article nominations]]") top_text.append("<div style=\"font-size: 20px; text-align: center;\">Alternative lists of articles awaiting review</div>") top_text.append("<div style=\"font-size: 16px; text-align: center;\">[[User:SDZeroBot/GAN sorting|Sortable list annotated with more detailed topic information]]</div>") top_text.append("<div style=\"font-size: 16px; text-align: center;\">[[User:ChristieBot/SortableGANoms|List allowing sorting by nominator, review count, and other fields]]</div>") top_text.append("") top_text.append("To add good article nominations to this page, please see the [[Wikipedia:Good article nominations/Instructions|instructions]].") top_text.append("") top_text.append("Note: For guidance in locating and [[Wikipedia:Citing sources|citing sources]] for articles, please see [[Wikipedia:Identifying reliable sources]]. For guidance in locating and citing sources for medicine and psychology-related articles, see [[Wikipedia:Identifying reliable sources (medicine)]]. For guidance in locating and citing sources for science and math-related articles, please see [[Wikipedia:Scientific citation guidelines]].") top_text.append("__NOTOC__") top_text.append("<!-- NOMINATION CATEGORIES BEGIN HERE -->") top_text.append("<!-- EVERYTHING BELOW THIS COMMENT IS UPDATED AUTOMATICALLY BY A BOT -->") top_text.append("") return("\n".join(top_text)) @classmethod def GAN_page_bottom_text(cls): bottom_text= ["<!-- EVERYTHING ABOVE THIS COMMENT IS UPDATED AUTOMATICALLY BY A BOT -->"] bottom_text.append("{{-}}<!-- For proper page length when Misc is empty -->") bottom_text.append("{{Wikipedia:Good article nominations/Topic lists}}") bottom_text.append("{{Wikipedia community}}") bottom_text.append("<!-- NOMINATION CATEGORIES END HERE -->") return("\n".join(bottom_text)) @classmethod def notify_error(cls, location, command, error, fatal = False): GA_config.errors_found = True site = pywikibot.Site('en','wikipedia') page = pywikibot.Page(site, GA_config.strings['GAN bugs page']) page.text += "\n==Error notification==\n* Location: " + location + "\n* Command: " + command + "\n* Error: " + str(error) + "\n-- ~~~~\n" page.save("Reporting an error in " + location) if fatal: exit() @classmethod def clear_errors(cls): site = pywikibot.Site('en','wikipedia') page = pywikibot.Page(site, GA_config.strings['GAN errors page']) page.text = "" page.save("Clearing errors at start of run") @classmethod #def find_enclosed_string(cls, string): # Used to search for matching right brace pairs when getting params. # print("String passed to find_enclosed_string is <" + string + ">") # left_brace_cnt = 0 # enclosed_list = [] # enclosed_str_range = [0, 0] # for i, s in enumerate(string): # print(string[i:i+1]) # if s == "{": # if left_brace_cnt == 0: # enclosed_str_range[0] = i # left_brace_cnt += 1 # elif s == "}": # left_brace_cnt -= 1 # if left_brace_cnt == 0: # enclosed_str_range[1] = i # if enclosed_str_range[1] > enclosed_str_range[0]: # enclosed_list.append(string[enclosed_str_range[0]:enclosed_str_range[1]+1]) # enclosed_str_range = [0, 0] # return enclosed_list def find_enclosed_string(cls, string): # Used to search for matching right brace pairs when getting params. #print("String passed to find_enclosed_string is <" + string + ">") left_brace_pair_cnt = 0 enclosed_list = [] enclosed_str_range = [0, 0] for i, s in enumerate(string): s2 = (string[i:i+2]) if s2 == "{{": if left_brace_pair_cnt == 0: enclosed_str_range[0] = i left_brace_pair_cnt += 1 elif s2 == "}}": left_brace_pair_cnt -= 1 if left_brace_pair_cnt == 0: enclosed_str_range[1] = i if enclosed_str_range[1] > enclosed_str_range[0]: enclosed_list.append(string[enclosed_str_range[0]:enclosed_str_range[1]+2]) enclosed_str_range = [0, 0] return enclosed_list @classmethod def write_errors_page(cls,error_page_text): #print("write errors called with :" + error_page_text) site = pywikibot.Site('en','wikipedia') page = pywikibot.Page(site, GA_config.strings['GAN errors page']) error_summary = "All errors cleared" if error_page_text == '' and GA_config.errors_found == False: page.text = "" else: page.text += error_page_text error_summary = "Updating list of GAN errors" page.save(error_summary) return None @classmethod def mark_superseded_reviews(cls, conn): cursor = conn.cursor(pymysql.cursors.DictCursor) cursor2 = conn.cursor(pymysql.cursors.DictCursor) cursor3 = conn.cursor(pymysql.cursors.DictCursor) sql = "select article_title, page from " + GA_config.strings['GA reviews table name'] + " where superseded_ts is null group by article_title, page having count(*) > 1" #print(sql) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("mark_superseded_reviews",sql,e) if cursor.rowcount != 0: # We found at least one review with a blank reviewer GAN.log(conn,"mark_superseded_reviews","N/A", "Found supersession candidates") for row in cursor.fetchall(): GAN.log(conn,"mark_superseded_reviews","N/A", "Supersession candidates: " + str(row['article_title']) + " / " + str(row['page'])) sql2 = "select reviewer, review_ts from " + GA_config.strings['GA reviews table name'] + " where article_title = '" + row['article_title'].replace("'","''") + "' and page = " + str(row['page']) + " order by review_ts desc" try: cursor2.execute(sql2) except pymysql.Error as e: GAN.notify_error("mark_superseded_reviews",sql2,e) if cursor2.rowcount == 0: GAN.notify_error("mark_superseded_reviews","Marking superseded reviews: can't find " + row['article_title'].replace("'","''") + " / " + str(row['page']),e) else: skipped_first = False for row2 in cursor2.fetchall(): #print("Found reviewer " + row2['reviewer'] + " at time " + row2['review_ts'].strftime("%Y%m%d%H%M%S") + " for " + row['article_title'] + " / " + str(row['page'])) if skipped_first == False: skipped_first = True else: # The ones after the first one are the ones that should be marked superseded. sql3 = "update " + GA_config.strings['GA reviews table name'] + " set superseded_ts = '" + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + "' where article_title = '" sql3 += row['article_title'].replace("'","''") + "' and page = " + str(row['page']) + " and reviewer = '" + row2['reviewer'].replace("'","''") + "' and review_ts = '" + str(row2['review_ts']) + "'" GAN.log(conn,"mark_superseded_reviews", row['article_title'],"marking review " + str(row['page']) + "as superseded") try: pass cursor.execute(sql3) except pymysql.Error as e: conn.rollback() GAN.notify_error("mark_superseded_reviews",sql3,e) else: GAN.log(conn,"mark_superseded_reviews", "N/A","nothing to mark as superseded") conn.commit() @classmethod def integrity_checks(cls, conn): cursor = conn.cursor(pymysql.cursors.DictCursor) # Check for a nomination record without a P/F/X to close it that does not appear in active nominations. sql = "select n.title, n.page from" sql += " (select distinct n1.title, n1.page from " + GA_config.strings['nominations table name'] + " n1" sql += " left join " + GA_config.strings['active nominations table name'] + " a" sql += " on n1.title = a.title" sql += " and n1.page = a.page" sql += " where a.title is null) n" sql += " left join (select distinct n2.title, n2.page from " + GA_config.strings['nominations table name'] + " n2 where status in ('P','F','X')) n2" sql += " on n.title = n2.title" sql += " and n.page = n2.page" sql += " where n2.title is null" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("integrity_checks",sql,e) if cursor.rowcount != 0: # We found at least one record that has a nomination record not represented in active_nominations for row in cursor.fetchall(): GAN.notify_error("integrity check","Found unclosed inactive nomination " + row['title'] + '/' + str(row['page']),e) # Check for more than one active nomination record for any article. sql = "select title from " + GA_config.strings['active nominations table name'] + " group by title having count(*) > 1" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("integrity_checks",sql,e) if cursor.rowcount != 0: # We found at least one article that has multiple active nomination records for row in cursor.fetchall(): GAN.notify_error("integrity check","Found multiple active nominations for " + row['title'],e) # Check for any reviewers with a blank reviewer name in nominations table sql = "select title, page from " + GA_config.strings['nominations table name'] + " where reviewer = '' and status in ('H','2','R','P','F');" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("integrity_checks",sql,e) if cursor.rowcount != 0: # We found at least one review with a blank reviewer for row in cursor.fetchall(): GAN.notify_error("integrity check","Found blank reviewer in " + GA_config.strings['nominations table name'] + " table for " + str(row['title']) + " / " + str(row['page']),e) # Check for any reviewers with a blank reviewer name in GA_reviews table sql = "select article_title, page from " + GA_config.strings['GA reviews table name'] + " where reviewer = '';" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("integrity_checks",sql,e) if cursor.rowcount != 0: # We found at least one review with a blank reviewer for row in cursor.fetchall(): GAN.notify_error("integrity check","Found blank reviewer in " + GA_config.strings['GA reviews table name'] + " for " + str(row['title']) + " / " + str(row['page']),e) # Check for duplicate GA_review records -- same reviewer recorded more than once for a given review sql = "select article_title, page, reviewer from " + GA_config.strings['GA reviews table name'] + " group by article_title, page, reviewer having count(*) > 1" try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("integrity_checks",sql,e) if cursor.rowcount != 0: # We found at least one article that has a duplicate reviewer for row in cursor.fetchall(): GAN.notify_error("integrity check","Found duplicate reviewer for " + str(row['article_title']) + " / " + str(row['page']),e) return None class Name_changes: @classmethod def get_name_changes(cls, conn): # Build a dictionary from the name_changes table name_changes = {} cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "select n.old_name, n.new_name from " + GA_config.strings['name changes table name'] + " n " try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("initialize name changes dictionary",sql,e) for row in cursor.fetchall(): name_changes[row['old_name']] = row['new_name'] return name_changes class GAN_history: @classmethod def analyse_a_page(cls, conn, site, GA_page_title): review_page = pywikibot.Page(site, GA_page_title) if not review_page.exists(): print("Review page " + GA_page_title + " no longer exists") pass elif review_page.title()[-1:] not in ['1','2','3','4','5','6','7','8','9','0']: GAN.log(conn,"analyse_a_page",review_page.title(),"Not a GA subpage") else: review_page_num = int(review_page.title()[-1:]) try: review_page_text = review_page.get() # Here we know it's not a redirect page because the get didn't raise an error reviewer = review_page.oldest_revision['user'] review_ts = review_page.oldest_revision['timestamp'] article_title = review_page.title()[5:-4] #GAN.log(conn,"analyse_a_page",article_title, "reviewer is " + reviewer + "; review was started " + str(review_ts)) if GAN_history.is_already_inserted(conn, site, reviewer, article_title, review_page_num, review_ts): GAN.log(conn,"analyse_a_page",review_page.title(),"Skipping review for page " + str(review_page_num) + " -- already inserted") else: GAN_history.insert_historical_GA_review(conn, site, reviewer, article_title, review_page_num, review_ts) except pywikibot.exceptions.IsRedirectPageError as e: review_page_redir_target = review_page.getRedirectTarget() if review_page_redir_target.exists(): reviewer = review_page_redir_target.oldest_revision['user'] review_ts = review_page_redir_target.oldest_revision['timestamp'] article_title = review_page_redir_target.title()[5:-4] #GAN.log(conn,"analyse_a_page",review_page.title(), "Reviewer is " + reviewer + "; review was started " + str(review_ts)) if GAN_history.is_already_inserted(conn, site, reviewer, article_title, review_page_num, review_ts): GAN.log(conn,"analyse_a_page",review_page.title(),"Skipping review for page " + str(review_page_num) + " -- already inserted") else: GAN_history.insert_historical_GA_review(conn, site, reviewer, article_title, review_page_num, review_ts) else: GAN.log(conn,"analyse_a_page",review_page.title(), "Redirect target page " + review_page_redir_target.title() + " does not exist") @classmethod def is_already_inserted(cls, conn, site, reviewer, article_title, page_num, review_ts): sql = "select * from " + GA_config.strings['historical GA reviews table name'] + " where reviewer = '" + reviewer.replace("'","''") + "' and article_title = '" + article_title.replace("'","''") + "' and page = " + str(page_num) + " and review_ts = '" + review_ts.strftime("%Y-%m-%d %H:%M:%S") + "'" #GAN.log(conn,"is_already_inserted",article_title, "sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("is_already_inserted",sql,e) return(True) if cursor.rowcount != 0: return(True) else: return(False) @classmethod def insert_historical_GA_review(cls, conn, site, reviewer, article_title, page_num, review_ts): GAN.log(conn,"scan_for_new_pages", article_title,"inserting into historical reviews table") sql = "insert into " + GA_config.strings['historical GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + reviewer.replace("'","''") + "','" + article_title.replace("'","''") + "'," + str(page_num) + ",'" + str(review_ts) + "')" #GAN.log(conn,"insert_historical_GA_review",article_title, "sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("insert_historical_GA_review",sql,e) conn.rollback() conn.commit() return(None) @classmethod def record_historical_reviewer_run(cls, conn, reviewer, method): GAN.log(conn,"scan_for_new_pages", article_title,"inserting into historical reviewer runs table") sql = "insert into " + GA_config.strings['historical reviewer_runs table name'] + " (reviewer, method, run_ts) values ('" + reviewer.replace("'","''") + "','" + method.replace("'","''") + "','" + str(datetime.datetime.utcnow()) + "')" #GAN.log(conn,"record_historical_reviewer_run","N/A", "sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("record_historical_reviewer_run",sql,e) conn.rollback() conn.commit() return(None) @classmethod def run_exists(cls, conn, reviewer, method): sql = "select * from " + GA_config.strings['historical reviewer_runs table name'] + " where reviewer = '" + reviewer.replace("'","''") + "' and method = '" + method.replace("'","''") + "'" GAN.log(conn,"run_exists","N/A", "sql is " + sql) cursor = conn.cursor(pymysql.cursors.DictCursor) try: cursor.execute(sql) except pymysql.Error as e: GAN.notify_error("run_exists",sql,e) return(True) if cursor.rowcount != 0: return(True) else: return(False)