Jump to content

User:ARandomName123/update bot

From Wikipedia, the free encyclopedia
# For use in Wikiproject Unreferenced articles backlog drives
# Based on a Python script written by User:Broc, adadpted to pywikibot by User:ARandomName123
# Please see https://www.mediawiki.org/wiki/Manual:Pywikibot/Installation to configure/login pywikibot

import pywikibot
import requests
import re
from bs4 import BeautifulSoup
site = pywikibot.Site('en', 'wikipedia')  # The site we want to run our bot on
page = pywikibot.Page(site, 'User:ARandomName123/sandbox')

def get_n1_value(username, session):
    # Construct the URL for the sigma tool
    url = f'https://sigma.toolforge.org/summary.py?name={username}&search=FEB24&max=500&server=enwiki&ns=Main&enddate=20240201&startdate='

    # Send a GET request to the URL using the session
    response = session.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all unordered lists on the page
        ul_elements = soup.find_all('ul')

        # Check if there is a second <ul>
        if len(ul_elements) >= 2:
            second_ul_element = ul_elements[1]

            # Find all list items in the second <ul>
            li_elements = second_ul_element.find_all('li')

            # List to store unique PAGENAMEs
            unique_pagename_list = []

            # Iterate through list items
            for li_element in li_elements:
                # Find all <a> elements within the <li>
                a_elements = li_element.find_all('a')

                # Check if there are at least 4 <a> elements
                if len(a_elements) >= 4:
                    # Get the URL in the 4th <a> element
                    pagename_url = a_elements[3].get('href', '')

                    # Extract PAGENAME from the URL (you may need to adjust this based on the actual URL structure)
                    pagename_match = re.search(r'/wiki/([^?&#]+)', pagename_url)
                    if pagename_match:
                        pagename = pagename_match.group(1)

                        # Find the <i> tag following the last <a> tag
                        i_tag = a_elements[-1].find_next('i')

                        # Check if "feb24" is contained in the <i> tag and "feb24review" is not
                        if i_tag and 'feb24' in i_tag.get_text().lower() and 'feb24review' not in i_tag.get_text().lower():
                            unique_pagename_list.append(pagename)

            # Print the list of PAGENAMEs per user
            #print(f"User: {username}, PAGENAMEs: {unique_pagename_list}")

            # Count the number of unique PAGENAMEs
            n1_value = len(set(unique_pagename_list))
            return n1_value, unique_pagename_list

    # If the request was not successful or no second <ul> was found, return None
    return None, None

def parse_table(text):
    # Create a session for making requests
    session = requests.Session()

    # Keep track of processed usernames to avoid duplicate processing
    processed_usernames = set()

    # Find the starting index of the table
    start_index = text.find('{| class="wikitable sortable"')

    # Check if the table exists in the text
    if start_index != -1:
        # Find the ending index of the table
        end_index = text.find('|}', start_index)

        # Extract the table content
        table_content = text[start_index:end_index]

        # Split the table content into lines
        lines = table_content.split('\n')

        # Iterate through lines
        for i, line in enumerate(lines):
            if '![[User:' in line:
                # Extract username from the line
                username = re.search(r'\[\[User:([^]]+)', line).group(1)

                # Check if the username has already been processed
                if username not in processed_usernames:
                    # Get N1 value using the external function
                    n1_value, _ = get_n1_value(username, session)

                    if n1_value is not None:
                        # Two lines after the username
                        n1_line_index = i + 2

                        # One more line after N1
                        #n2_line_index = i + 3

                        if n1_line_index < len(lines):
                            # Update the N1 value in the line
                            lines[n1_line_index] = f"| {n1_value}"

                            # Debug prints
                            #print(f"User: {username}")
                            #print(f"New N1 line: {lines[n1_line_index]}")
                            #print(f"New N2 line: {lines[n2_line_index]}")

                            # Mark the username as processed
                            processed_usernames.add(username)

                            # Introduce a delay between calls (adjust as needed)
                            #time.sleep(0.1)  # 0.1-second delay

        # Join the lines back together
        modified_table = '\n'.join(lines)

        # Replace the original table with the modified one
        text = text[:start_index] + modified_table + text[end_index:]
        return (text)
   
# Extracts page content
content = page.text
newText = parse_table(content)

# Replaces old table with new one
page.text = page.text.replace(content, newText)

 # Saves the page
page.save('pwb leaderboard update test run')