User:ChristieBot/GANbot.py
Appearance
''' Copyright (c) 2022 Mike Christie Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' # Third party modules import pywikibot pywikibot.config.max_retries=2 import re import datetime import sys import os import pymysql import configparser import operator # Local modules sys.path.append('./www/python/src') # Not needed if I run from that directory from GA import Topic, Subtopic, Nom, Review_stats, WBGAN, Active_nomination, GAN, Name_changes, Nom_list import GA_config from GA_history import GAH, FailedGA, GAnominee, Article_History, GA_article_page, GARlink, GA_talk_page, GA_sub_page, GA_history_Exception, GAO # Config HOME=os.environ.get('HOME') #get environment variable $HOME replica_path=HOME + '/replica.my.cnf' if os.path.exists(replica_path): #check that the file is found config = configparser.ConfigParser() config.read(replica_path) else: print('replica.my.cnf file not found') #GAN.log(gan_conn,"GANbot","Looking for replica.my.cnf","File not found") # Set up the connection to the GAN database database = "s55175__ganfilter" gan_conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s55175__ganfilter", host='tools.db.svc.eqiad.wmflabs') # Create an initial copy of the review stats. This will be used to provide the values to store in the nomination audit trail so that we know how many GAs and reviews were # showing on the GAN page when the review was started. review_stats = Review_stats(gan_conn) # Refresh the GA reviewing stats dictionary from the database # Create the name changes lookup from the database users = {} name_changes = Name_changes.get_name_changes(gan_conn) #t = datetime.datetime.utcnow() GAN.log(gan_conn, "GANbot","Initializing","Starting run") # Initialize some variables site = pywikibot.Site('en','wikipedia') wbgan = WBGAN.get_wbgan(config, gan_conn) # The WBGAN database that holds the number of promoted GAs by each nominator. Maintained by another tool. [topics, subtopics] = Topic.initialize() # Clear the error page GAN.clear_errors() single_title = GA_config.single_title # If is_live = False, then single_title can be used to restrict a run to a single article to speed up testing of changes # Next step is get the list of articles we have to look at in this pass. This is the union of two lists: # 1. All the articles that currently have a {{GA nominee}} template on their talk page; and # 2. All articles currently in the active_nominations table. cat_articles = [] cat = pywikibot.Category(site,'Good article nominees') # We're going to iterate over these; this is the contents of GAN. for x in cat.articles(): title = x.title()[5:] if title not in cat_articles: cat_articles.append(title) GAN.log(gan_conn,"GANbot","Process active nominations table","Starting") last_run_articles = Active_nomination.get_titles(gan_conn) # This is the list of what was in GAN the last time the bot ran GAN.log(gan_conn,"GANbot","Process active nominations table","Obtained active nominations") all_articles = list(set(cat_articles + last_run_articles)) # All articles in the list last time or this time or both, with duplicates removed GAN.log(gan_conn,"GANbot","Process active nominations table","Created list of articles") update_log = [''] update_needed = False topic_update_needed = {} topic_PFX_log = {} for t in topics.keys(): topic_update_needed[t] = False topic_PFX_log[t] = [''] noms = Nom_list() # This will be loaded with the nominations that will print on GAN. ctr = 1 GAN.log(gan_conn, "GANbot:all_articles loop",title,"About to start loop") for title in all_articles: GAN.log(gan_conn, "GANbot:all_articles loop",title,"Starting") ctr += 1 if ctr % 10 == 0: GAN.log(gan_conn, "GANbot:all_articles loop", None, "Processed " + str(ctr) + " articles") #print("Processed " + str(ctr) + " articles") article_page = pywikibot.Page(site, title) talk_page = pywikibot.Page(site, 'Talk:' + title) try: in_cat = 'Category:Good article nominees' in [x.title() for x in talk_page.categories()] # Check it's still nominated -- necessary because the loop can take a couple of minutes except pywikibot.exceptions.ServerError as e: GAN.log(gan_conn,"GANbot:exceptions",title,"Server error testing if talk page is in category") in_last = title in last_run_articles matches = None cat_nom = None old_nom = None new_review = False transclusion_needed = False new_nomination_state = False promotion_needed = False tell_nominator_needed = False if in_last: #print("In last") old_nom = Active_nomination.get_active_nomination(gan_conn, title) # There should never be more than one active nom for an article # Note that old_nom will be None if there was a problem retrieving it. # Here if the old_nom is a lower page number than the new nom, we are probably in a situation where the old nom has been passed/failed and a new nom created between runs. # This can happen if a review has been found to be invalid and the best thing to do is fail it and renominate. # Things to check: two nominee templates? Page number incremented by 1? If OK, destroy nom and run as if we were just doing the fail pass. That will get rid of the record # in active_nominations and the next pass will pick up the new nom. if not in_cat and not in_last: #print("Not in cat, not in last") # This means the nomination was removed before the loop reached this one, and it wasn't found in the last pass, so we can ignore it. # In the future this would be a place to put code to find out if it was reviewed and passed/failed inside 20 minutes continue if in_cat: GAN.log(gan_conn, "GANbot:all_articles loop", title, "In the in_cat branch") params = GAN.get_params(gan_conn, talk_page) GAN.log(gan_conn, "GANbot:all_articles loop", title, "Retrieved params: " + str(params)) if params == None: GA_config.current_errors.append("Malformed GA nomination for [[" + title + "]]/" + str(nom.page_num)) continue param_errors = GAN.check_params(params, title) nominator = params['nominator'] status = params['status'] page_num = params['page'] subtopic = params['subtopic'] if subtopic.lower() in Subtopic.subtopic_var_dict.keys(): subtopic = Subtopic.subtopic_var_dict[subtopic.lower()] topic = 'Miscellaneous' if subtopic in subtopics.keys(): topic = subtopics[subtopic].topic.name nomination_ts = params['timestamp'] note = params['note'] shortdesc = params['shortdesc'] nom = Nom(gan_conn, topic, subtopic, title, status, page_num, nominator, nomination_ts, note, shortdesc) noms.add(nom) #topic_update_needed[topic] = True # Use this to force a rewrite of all the topic pages; comment out when not needed if nom.review_page_exists == False and nom.status != '': GAN.log(gan_conn, "GANbot:in_cat",title,"no review page and inconsistent status") #print("no review page and inconsistent status") # Here it appears a review was started but the page was deleted, or else the parameter was manually set to "onreview" or something similar but no review was actually started. # In this case we should set the review status in the database to '', since the existence of the review page is more important than the status parameter. # We also need to write an error so someone can go clean up the review page. nom.update_status('') GA_config.current_errors.append("No review page exists for [[" + title + "]]/" + str(nom.page_num) + " but status is set to " + nom.status_string + "\n") if title not in last_run_articles or old_nom == None: GAN.log(gan_conn,"GANbot:in_cat",title,"This is a new nomination") #print("new nomination") # Here we know this is the first time we've seen this nomination, or else the active_nomination can't be found so we treat it as new update_needed = True topic_update_needed[topic] = True new_nomination_state = True nom.new_status_message(nom.status, None) if nom.review_page_exists: GAN.log(gan_conn,"GANbot:in_cat",title,"New nomination and review page already exists; setting transclusion_needed and new_review to True") #print("Already on review") new_review = True transclusion_needed = True if nom.status in ['H','2']: # It can't be passed or failed since it's still nominated tell_nominator_needed = True else: GAN.log(gan_conn,"GANbot:in_cat",title,"This nomination is not new") #print("Not a new nomination") # Here we have both a current nomination and an older version of it, so we need to compare and see if there are any updates matches = nom.compare(old_nom) if False in matches.values(): GAN.log(gan_conn,"GANbot:in_cat",title,"Pre-existing nomination and something has changed") #print("Something changed") # Here we know something has changed update_needed = True topic_update_needed[topic] = True new_nomination_state = True if nom.page_num != str(old_nom['page']): GAN.log(gan_conn,"GANbot:in_cat:page number has changed",title,"Two nominations found with different page numbers (" + str(old_nom['page']) + " and " + str(nom.page_num) + ")") # If we're here, the page number has changed, and we need to treat the previous nom as a fail, and the new nom as new # This is the only situation in which we need to have two Nom objects created. We already have nom as the one in cat (the new one), so we'll create # another from the active nomination table/last run data. # If the previous version never had a review page created, it was removed; else it was failed. last_run_new_status = 'X' last_run_nom = Nom(gan_conn, topic, old_nom['subtopic'], old_nom['title'], last_run_new_status, old_nom['page'], old_nom['nominator'], old_nom['nomination_ts'], old_nom['note'], old_nom['shortdesc']) if last_run_nom.review_page_exists: GAN.log(gan_conn,"GANbot:in_cat:page number has changed",title,"Old nomination has a review page so failing it") last_run_nom.new_status_message('F', old_nom['status']) last_run_nom.update_status('F') last_run_nom.save_nomination_state(gan_conn, name_changes, wbgan, review_stats) Active_nomination.update_active_nominations(gan_conn) # Reload the active nominations table #print("Calling last_run_nom.tell_nominator()") last_run_nom.tell_nominator() # What if this old nom was never transcluded? # Now the old nomination is marked as fail in the database, and we run the same code we would have for no record found in last_run: # Note that this should probably be replaced with a class method which is also called first time round, above. nom.new_status_message(nom.status, None) if nom.review_page_exists: GAN.log(gan_conn,"GANbot:in_cat:page number has changed",title,"Review page already exists for the new nomination; setting transclusion_needed and new_review to True") #print("Page number changed and new review exists") new_review = True transclusion_needed = True if nom.status == 'H': # It can't be passed or failed since it's still nominated #print("Page number changed, new review on hold") tell_nominator_needed = True else: if nom.reviewer != old_nom['reviewer'] and nom.reviewer != None: #print("Page number unchanged, reviewer changed") tell_nominator_needed = True new_review = True if nom.review_page_exists == True and old_nom['status'] == '': GAN.log(gan_conn,"GANbot:in_cat:match values is False",title,"Nomination has updated parameters, the review page exists, and the old status is blank: setting transclusion_needed and tell_nominator_needed to True") #print("Page number unchanged, review page exists") tell_nominator_needed = True transclusion_needed = True if matches['status'] == False: #print("Page number unchanged, status changed") nom.new_status_message(nom.status, old_nom['status']) nom.update_timestamps(old_nom['status']) if nom.status in ['P','F','H'] and nom.status != old_nom['status']: #print("Page number unchanged, status changed to PFX") tell_nominator_needed = True else: #print("Page number unchanged, status unchanged") # Note that if the review page exists, the status will have been set to "onreview" even if that's not on the page, because the review page trumps the status. mismatches = [x for x in matches.keys() if matches[x] == False] mismatches_string = "/".join(mismatches) # Here the mismatches could be because we're now reviewing or have deleted a review page if (nom.reviewer == None or nom.reviewer == '') and old_nom['reviewer'] != '': #print("No longer on review") # No longer on review nom.edit_summary = "No longer on review [[" + nom.title + "]]" elif transclusion_needed == False: #print("Doesn't need transclusion") if nom.reviewer != None and nom.reviewer != '' and old_nom['reviewer'] == '': #print("Set on review status") # On review -- I think this path gets replicated by the code in transclude() nom.edit_summary = "On review [[" + nom.title + "]] by " + nom.reviewer else: #print("Set updated status") nom.edit_summary = "Updated [[" + nom.title + "]] (" + mismatches_string + ")" elif nom.status == '' and nom.review_page_exists: GAN.log(gan_conn,"GANbot:in_cat:match values is True",title,"Nomination has no new parameters but the review page now exists; setting transclusion_needed, new_review, tell_nominator_needed, and new_nomination_state to True") #print("Newly under review") # This means it's newly under review new_review = True tell_nominator_needed = True transclusion_needed = True new_nomination_state = True else: #print("Not in cat, in last run") # Here it's not in cat but is in last_run, so it's probably been passed/failed/removed. # Another possibility is that the GA review page has been deleted. # Set a flag for inserting a nomination record update_needed = True #print("Getting active nom for " + title) GAN.log(gan_conn,"GANbot:in_last", title,"getting active nomination from DB") row = Active_nomination.get_active_nomination(gan_conn, title) if row['subtopic'] in subtopics.keys(): topic = subtopics[row['subtopic']].topic.name else: topic = 'Miscellaneous' topic_update_needed[topic] = True nom = Nom(gan_conn, topic, row['subtopic'], row['title'], row['status'], row['page'], row['nominator'], row['nomination_ts'], row['note'], row['shortdesc']) new_nomination_state = True new_status = 'F' # It's a fail if it didn't pass and wasn't removed. Note: could add a warning here if there's no fail template or article history record tell_nominator_needed = True if Nom.is_a_GA(talk_page): #print("Is a GA") new_status = 'P' promotion_needed = True tell_nominator_needed = True elif not nom.review_page_exists: #print("Must have been removed") # It was never reviewed and didn't pass, so it must have been removed new_status = 'X' tell_nominator_needed = False if old_nom['status'] == '' and new_status != 'X': #print("Transclude if previously unreviewed") # If it was passed or failed and was last recorded as unreviewed we have to transclude the review transclusion_needed = True new_review = True nom.new_status_message(new_status, old_nom['status']) nom.update_status(new_status) update_log.append(nom.edit_summary) topic_PFX_log[topic].append(nom.edit_summary) #print("TX needed " + str(transclusion_needed) + "; new_review " + str(new_review) + "; new_nomination_state " + str(new_nomination_state) + "; promotion_needed " + str(promotion_needed) + "; tell_nominator_needed " + str(tell_nominator_needed)) # Now we're past all the setup and can do whatever the flags tell us to do. #GAN.log(gan_conn,"GANbot:after setup, about to execute flags",title,"Flags are: " + "transclusion_needed " + str(transclusion_needed) + "; new_review " + str(new_review) + "; new_nomination_state " + str(new_nomination_state) + "; promotion_needed " + str(promotion_needed) + "; tell_nominator_needed " + str(tell_nominator_needed)) # Note that nom is created in the cat_noms section if it exists there, but if it doesn't it's created in the last_run_noms section if transclusion_needed: nom.transclude(gan_conn) if new_review: nom.add_a_review(gan_conn) if new_nomination_state: nom.save_nomination_state(gan_conn, name_changes, wbgan, review_stats) Active_nomination.update_active_nominations(gan_conn) # Reload the active nominations table if promotion_needed: nom.add_GA_star(gan_conn) # Also updates the oldid if needed if tell_nominator_needed: #print("Calling nom.tell_nominator()") nom.tell_nominator() # Outside the loop, all noms processed, so here we should be able to rebuild some things Active_nomination.update_active_nominations(gan_conn) # Reload the active nominations table Review_stats.update_statistics(gan_conn) # update the GA reviewing stats table in SQL and update the GA reviewing stats page on Wikipedia review_stats = Review_stats(gan_conn) # Refresh the GA reviewing stats dictionary from the database #print(str(topic_update_needed)) # The next loops are going to generate the GAN page target = 'Wikipedia' sort_order = GA_config.sort_order print_list = [] collapsed = [] # This will hold noms that should be displayed in a collapsed list at the end of each topic noms_by_user = {} # Get the user information before trying to print anything, since this can change sort order for n in noms.noms: # As we look at each nomination, add the nominator and reviewer GA and review counts to the user dictionary if they're not already in it. #print("About to call update_users; nom is " + n.title) n.update_users(gan_conn, users, name_changes, review_stats, wbgan) # Build a dictionary of noms by user as we go for use in building the collapse list if n.nominator in noms_by_user.keys(): noms_by_user[n.nominator].append({'title': n.title, 'nomination_ts': n.nomination_ts}) else: noms_by_user[n.nominator] = [{'title': n.title, 'nomination_ts': n.nomination_ts}] # Sort each user's list by the nomination date for u in noms_by_user: noms_by_user[u] = sorted(noms_by_user[u], key=lambda d: d['nomination_ts']) if len(noms_by_user[u]) > 20: add_to_collapse = noms_by_user[u][20:] for c in add_to_collapse: collapsed.append(c['title']) # If something is in collapsed, don't print it, put it in collapsed_topic. Then at the end of the topic, print those inside a collapse template. # Now assemble the GARs # Each row is of the form # {{GARentry|Terry Pratchett|1}} GA nominator: [[User:Example]]; GAR created: <date> # Create a dictionary with {'<subtopic>': {article_title: {GARpage=, orignom= , GARdate= }} # Then in the print code, add the GARs after the collapsed sections gar_dict = GAH.get_gar_dict(gan_conn, config) for k in topics.keys(): # We print the sections in topic order #print("In topic loop for " + k) topic = topics[k] topic_print_list = [] topic_update_log = topic_PFX_log[k] #print("Printing topic " + topic.name) topic_noms = [x for x in noms.noms if x.topic == topic.name] print_list.append(topic.header(target)) topic_print_list.append(topic.header(target)) for subtopic in topic.subtopics: #print("Printing subtopic " + subtopic.name) subtopic_noms = [x for x in topic_noms if x.subtopic == subtopic.name] print_list.append(subtopic.section_header()) topic_print_list.append(subtopic.section_header()) #print("About to sort" + subtopic.name) subtopic_noms = sorted(subtopic_noms, key=operator.attrgetter(sort_order), reverse=False) #print("Sorted " + subtopic.name) subtopic_gars = [] if subtopic.name in gar_dict.keys(): subtopic_gars = gar_dict[subtopic.name] collapsed_topic = [] for nom in subtopic_noms: #print(" In first for loop, printing " + nom.title + "; edit_summary is " + nom.edit_summary) if nom.title in collapsed: #print("putting " + nom.title + " in collapsed") collapsed_topic.append(nom) else: #print(nom.title + " is not in collapsed") nom_text = nom.print_GAN_entry() print_list.append(nom_text) topic_print_list.append(nom_text) if nom.edit_summary != "No change" and nom.edit_summary != "": update_log.append(nom.edit_summary) topic_update_log.append(nom.edit_summary) #print("At end of first for for " + subtopic.name + "; collapsed_topic has length " + len(collapsed_topic)) if len(collapsed_topic) > 0: #print("In the collapsed_topic if") print_list.append("{{cot|Additional nominations}}") print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations") topic_print_list.append("{{cot|Additional nominations}}") topic_print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations") for nom in collapsed_topic: #print("In the for loop inside collapsed topic") nom_text = nom.print_GAN_entry() print_list.append(nom_text) topic_print_list.append(nom_text) if nom.edit_summary != "No change" and nom.edit_summary != "": update_log.append(nom.edit_summary) topic_update_log.append(nom.edit_summary) print_list.append("{{cob}}") topic_print_list.append("{{cob}}") if len(subtopic_gars) > 0: print_list.append("=== " + subtopic.name + " reassessments ===\n") gars = gar_dict[subtopic.name] gars = sorted(gars, key = lambda d: d[1]['GARdate']) for gar in subtopic_gars: gar_line = "# {{GARentry|" + gar[0] + "|" + gar[1]['GARpage'] shortdesc = gar[1]['shortdesc'] if shortdesc is not None: gar_line += "|shortdesc=" + shortdesc gar_line += "}}" orignom = gar[1]['orignom'] if orignom is not None: #print("Orignom is not None: orignom = " + str(orignom) + "; gar = " + str(gar)) gar_line += " GA nominator: [[User:" + orignom + "|" + orignom + "]]" user = pywikibot.User(site, "User:" + orignom) #print(user) if user is not None: ule = user.last_edit if ule is not None: ule_ts = user.last_edit[2] ule_dt = GAH.convert_timestamp_to_datetime(ule_ts) ule_delta = datetime.datetime.utcnow() - ule_dt if ule_delta.days > 21: gar_line += " (inactive for " + str(ule_delta.days) + " days)" gar_line += "." GARdate = gar[1]['GARdate'] if GARdate is not None: gar_line += " GAR created: " + GARdate.strftime("%H:%M, %-d %B %Y (UTC)") print_list.append(gar_line) # Save topic page page = pywikibot.Page(site, GA_config.strings['GA topic pages base'] + topic.name) topic_page_text = '\n'.join(topic_print_list) page.text = topic_page_text topic_update_text = "\n".join(topic_update_log) if len(topic_update_log) == 0: topic_update_text = "Update metrics" if topic_update_needed[topic.name] == True: page.save(topic_update_text) GAN.write_backlog_by_sort_order(gan_conn, noms.noms, sort_order) GAN.mark_superseded_reviews(gan_conn) GAN.log(gan_conn,"GANbot", "N/A","About to run integrity checks") GAN.integrity_checks(gan_conn) error_edit_summary = '' current_errors_text = '' if len(GA_config.current_errors) > 0: current_errors_text = '\n* '.join(GA_config.current_errors) error_edit_summary = "Errors listed! " #print("About to save the main GAN page") page_text = '\n'.join(print_list) page = pywikibot.Page(site,GA_config.strings['GAN page']) # UPDATE ON GO LIVE page.text=page_text page.text = GA_config.strings['GAN top text'] + page_text + GA_config.strings['GAN bottom text'] if len(GA_config.current_errors) > 0: page.text = page.text + "\n==Errors==\n" + current_errors_text update_text = "\n".join(update_log) if len(update_log) == 0: update_text = "Update metrics" #update_needed = True # Uncomment this to force a page update to test layout and sort changes; the page will not update otherwise unless a nomination changes status in some way. GAN.log(gan_conn,"GANbot", "N/A","About to save main GAN page") if update_needed == True: page.save(error_edit_summary + update_text) GAN.log(gan_conn,"GANbot", "N/A","About to write errors page") GAN.write_errors_page(current_errors_text) GAN.log(gan_conn,"GANbot", "N/A","About to flush the audit trail") GAN.flush_audit(gan_conn) GAN.log(gan_conn,"GANbot", "N/A","About to write sortable table") # Now write the sortable table print_list = [] print_list.append('{| class="wikitable sortable"') print_list.append('!Article') print_list.append('!Status') print_list.append('!Nominator') print_list.append('!Subtopic') print_list.append('!{{Tooltip|Age|Age in days}}') print_list.append('!{{Tooltip|Revs|Reviews}}') print_list.append('!{{Tooltip|GAs|Promoted GAs}}') print_list.append('!{{Tooltip|R/G|Reviews per GA}}') print_list.append('!Note') for n in noms.noms: print_list.append("|-") print_list.append("|[[" + n.title + "]]") if n.status == "": print_list.append("|") else: print_list.append("|" + n.status_string) print_list.append("| [[User:" + n.nominator + "|" + n.nominator + "]]") print_list.append("|[[Wikipedia:Good article nominations#" + n.subtopic + "|" + n.subtopic + "]]") print_list.append("|" + str(n.age_in_days)) print_list.append("|" + str(n.nominator_reviews)) print_list.append("|" + str(n.nominator_GAs)) print_list.append("|" + str(n.R_over_G)) print_list.append("|" + n.note) print_list.append("|}") sortable_table_text = "\n".join(print_list) page = pywikibot.Page(site,GA_config.strings['GAN as a single table']) page.text=sortable_table_text if update_needed == True: page.save(error_edit_summary + update_text)